From 978e3cf52b0489fc678f84b1e7fd4849bd5e9d15 Mon Sep 17 00:00:00 2001 From: Nicholas Cilfone Date: Tue, 4 May 2021 16:40:56 -0400 Subject: [PATCH 1/7] WIP: S3 support for spock config files. Currently loading is implemented and passes local testing (unit tests to come via moto). Still need to implement saving (aka uploading to S3). Signed-off-by: Nicholas Cilfone --- .gitignore | 4 + setup.py | 3 +- spock/__init__.py | 3 +- spock/backend/__init__.py | 2 +- spock/backend/attr/payload.py | 6 +- spock/backend/attr/saver.py | 6 +- spock/backend/base.py | 69 ++++++++++------ spock/backend/s3/__init__.py | 12 +++ spock/backend/s3/utils.py | 131 ++++++++++++++++++++++++++++++ spock/builder.py | 15 ++-- spock/handlers.py | 65 +++++++++++++-- spock/utils.py | 19 +++++ tests/attr_tests/test_all_attr.py | 2 +- tests/debug/debug.py | 123 ---------------------------- tests/debug/debug.yaml | 83 ------------------- tests/debug/debug_inherit.yaml | 5 -- tests/debug/more_inherit.yaml | 2 - tests/debug/most_inherit.yaml | 2 - tests/debug/params/__init__.py | 1 - tests/debug/params/first.py | 97 ---------------------- tests/debug/params/second.py | 14 ---- 21 files changed, 288 insertions(+), 376 deletions(-) create mode 100644 spock/backend/s3/__init__.py create mode 100644 spock/backend/s3/utils.py delete mode 100644 tests/debug/debug.py delete mode 100644 tests/debug/debug.yaml delete mode 100644 tests/debug/debug_inherit.yaml delete mode 100644 tests/debug/more_inherit.yaml delete mode 100644 tests/debug/most_inherit.yaml delete mode 100644 tests/debug/params/__init__.py delete mode 100644 tests/debug/params/first.py delete mode 100644 tests/debug/params/second.py diff --git a/.gitignore b/.gitignore index 47bd7720..00d97224 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,9 @@ # Created by .ignore support plugin (hsz.mobi) ### Python template + +# Debugging folder +debug/ + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/setup.py b/setup.py index 929e9629..f6ef0a49 100644 --- a/setup.py +++ b/setup.py @@ -44,5 +44,6 @@ keywords=['configuration', 'argparse', 'parameters', 'machine learning', 'deep learning', 'reproducibility'], packages=setuptools.find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]), python_requires='>=3.6', - install_requires=install_reqs + install_requires=install_reqs, + extras_require={'s3': ['boto3', 'botocore', 'hurry.filesize']} ) diff --git a/spock/__init__.py b/spock/__init__.py index 0ea44bea..9faa0ddf 100644 --- a/spock/__init__.py +++ b/spock/__init__.py @@ -10,8 +10,9 @@ """ from spock._version import get_versions +from spock.backend.s3.utils import S3Config -__all__ = ["args", "builder", "config"] +__all__ = ["args", "builder", "config", "S3Config"] __version__ = get_versions()['version'] del get_versions \ No newline at end of file diff --git a/spock/backend/__init__.py b/spock/backend/__init__.py index d8767c44..7ec9249d 100644 --- a/spock/backend/__init__.py +++ b/spock/backend/__init__.py @@ -8,4 +8,4 @@ Please refer to the documentation provided in the README.md """ -__all__ = ["attr", "base"] +__all__ = ["attr", "base", "s3"] diff --git a/spock/backend/attr/payload.py b/spock/backend/attr/payload.py index adb8cdd8..4c8c6dd0 100644 --- a/spock/backend/attr/payload.py +++ b/spock/backend/attr/payload.py @@ -23,8 +23,8 @@ class AttrPayload(BasePayload): _loaders: maps of each file extension to the loader class """ - def __init__(self): - super().__init__() + def __init__(self, s3_config=None): + super().__init__(s3_config=s3_config) def __call__(self, *args, **kwargs): """Call to allow self chaining @@ -39,7 +39,7 @@ def __call__(self, *args, **kwargs): Payload: instance of self """ - return AttrPayload() + return AttrPayload(*args, **kwargs) @staticmethod def _update_payload(base_payload, input_classes, payload): diff --git a/spock/backend/attr/saver.py b/spock/backend/attr/saver.py index 3df6e73f..a0c45d06 100644 --- a/spock/backend/attr/saver.py +++ b/spock/backend/attr/saver.py @@ -20,11 +20,11 @@ class AttrSaver(BaseSaver): _writers: maps file extension to the correct i/o handler """ - def __init__(self): - super().__init__() + def __init__(self, s3_config=None): + super().__init__(s3_config=s3_config) def __call__(self, *args, **kwargs): - return AttrSaver() + return AttrSaver(*args, **kwargs) def _clean_up_values(self, payload, file_extension): # Dictionary to recursively write to diff --git a/spock/backend/base.py b/spock/backend/base.py index a3c1604f..64ff822e 100644 --- a/spock/backend/base.py +++ b/spock/backend/base.py @@ -21,6 +21,7 @@ from spock.handlers import TOMLHandler from spock.handlers import YAMLHandler from spock.utils import add_info +from spock.utils import check_path_s3 from spock.utils import make_argument from typing import List @@ -40,7 +41,26 @@ def __repr__(self): return yaml.dump(self.__dict__, default_flow_style=False) -class BaseSaver(ABC): # pylint: disable=too-few-public-methods +class BaseHandler(ABC): + """Base class for saver and payload + + *Attributes*: + + _writers: maps file extension to the correct i/o handler + _s3_config: optional S3Config object to handle s3 access + + """ + def __init__(self, s3_config=None): + self._supported_extensions = {'.yaml': YAMLHandler, '.toml': TOMLHandler, '.json': JSONHandler} + self._s3_config = s3_config + + def _check_extension(self, file_extension: str): + if file_extension not in self._supported_extensions: + raise TypeError(f'File extension {file_extension} not supported -- \n' + f'File extension must be from {list(self._supported_extensions.keys())}') + + +class BaseSaver(BaseHandler): # pylint: disable=too-few-public-methods """Base class for saving configs Contains methods to build a correct output payload and then writes to file based on the file @@ -49,10 +69,11 @@ class BaseSaver(ABC): # pylint: disable=too-few-public-methods *Attributes*: _writers: maps file extension to the correct i/o handler + _s3_config: optional S3Config object to handle s3 access """ - def __init__(self): - self._writers = {'.yaml': YAMLHandler, '.toml': TOMLHandler, '.json': JSONHandler} + def __init__(self, s3_config=None): + super(BaseSaver, self).__init__(s3_config=s3_config) def save(self, payload, path, file_name=None, create_save_path=False, extra_info=True, file_extension='.yaml'): #pylint: disable=too-many-arguments """Writes Spock config to file @@ -74,9 +95,8 @@ def save(self, payload, path, file_name=None, create_save_path=False, extra_info None """ - supported_extensions = list(self._writers.keys()) - if file_extension not in self._writers: - raise ValueError(f'Invalid fileout extension. Expected a fileout from {supported_extensions}') + # Check extension + self._check_extension(file_extension=file_extension) # Make the filename fname = str(uuid1()) if file_name is None else file_name name = f'{fname}.spock.cfg{file_extension}' @@ -89,7 +109,7 @@ def save(self, payload, path, file_name=None, create_save_path=False, extra_info if not os.path.exists(path) and create_save_path: os.makedirs(path) with open(fid, 'w') as file_out: - self._writers.get(file_extension)().save(out_dict, extra_dict, file_out) + self._supported_extensions.get(file_extension)().save(out_dict, extra_dict, file_out) except OSError as e: print(f'Not a valid file path to write to: {fid}') raise e @@ -672,19 +692,20 @@ def _get_from_sys_modules(cls_name): return module -class BasePayload(ABC): # pylint: disable=too-few-public-methods +class BasePayload(BaseHandler): # pylint: disable=too-few-public-methods """Handles building the payload for config file(s) This class builds out the payload from config files of multiple types. It handles various - file types and also composition of config files via a recursive calls + file types and also composition of config files via recursive calls *Attributes*: _loaders: maps of each file extension to the loader class + __s3_config: optional S3Config object to handle s3 access """ - def __init__(self): - self._loaders = {'.yaml': YAMLHandler(), '.toml': TOMLHandler(), '.json': JSONHandler()} + def __init__(self, s3_config=None): + super(BasePayload, self).__init__(s3_config=s3_config) @staticmethod @abstractmethod @@ -745,12 +766,10 @@ def _payload(self, input_classes, path, deps, root=False): """ # Match to loader based on file-extension config_extension = Path(path).suffix.lower() - supported_extensions = list(self._loaders.keys()) - if config_extension not in supported_extensions: - raise TypeError(f'File extension {config_extension} not supported\n' - f'Must be from {supported_extensions}') + # Verify extension + self._check_extension(file_extension=config_extension) # Load from file - base_payload = self._loaders.get(config_extension).load(path) + base_payload = self._supported_extensions.get(config_extension)().load(path, s3_config=self._s3_config) # Check and? update the dependencies deps = self._handle_dependencies(deps, path, root) payload = {} @@ -796,7 +815,8 @@ def _handle_includes(self, base_payload, config_extension, input_classes, path, """Handles config composition For all of the config tags in the config file this function will recursively call the payload function - with the composition path to get the additional payload(s) from the composed file(s) + with the composition path to get the additional payload(s) from the composed file(s) -- checks for file + validity or if it is an S3 URI via regex *Args*: @@ -814,14 +834,15 @@ def _handle_includes(self, base_payload, config_extension, input_classes, path, """ included_params = {} for inc_path in base_payload['config']: - if not os.path.exists(inc_path): - # maybe it's relative? - abs_inc_path = os.path.join(os.path.dirname(path), inc_path) + if check_path_s3(inc_path): + use_path = inc_path + elif os.path.exists(inc_path): + use_path = inc_path + elif os.path.join(os.path.dirname(path), inc_path): + use_path = os.path.join(os.path.dirname(path), inc_path) else: - abs_inc_path = inc_path - if not os.path.exists(abs_inc_path): - raise RuntimeError(f'Could not find included {config_extension} file {inc_path}!') - included_params.update(self._payload(input_classes, abs_inc_path, deps)) + raise RuntimeError(f'Could not find included {config_extension} file {inc_path} or is not an S3 URI!') + included_params.update(self._payload(input_classes, use_path, deps)) payload.update(included_params) return payload diff --git a/spock/backend/s3/__init__.py b/spock/backend/s3/__init__.py new file mode 100644 index 00000000..6927bcbe --- /dev/null +++ b/spock/backend/s3/__init__.py @@ -0,0 +1,12 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 FMR LLC +# SPDX-License-Identifier: Apache-2.0 + +""" +Spock is a framework that helps manage complex parameter configurations for Python applications + +Please refer to the documentation provided in the README.md +""" + +__all__ = ["utils"] diff --git a/spock/backend/s3/utils.py b/spock/backend/s3/utils.py new file mode 100644 index 00000000..262c3d4b --- /dev/null +++ b/spock/backend/s3/utils.py @@ -0,0 +1,131 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 FMR LLC +# SPDX-License-Identifier: Apache-2.0 + +"""Handles all S3 related ops -- allows for s3 functionality to be optional to keep req deps light""" + +import attr +try: + import boto3 + from botocore.client import BaseClient +except ImportError: + print('Missing libraries to support S3 functionality. Please re-install spock with the extra s3 dependencies -- ' + 'pip install spock-config[s3]') +from hurry.filesize import size +import os +from urllib.parse import urlparse +import re +import sys +import typing + + +@attr.s(auto_attribs=True) +class S3Config: + """Configuration class for S3 support + + *Attributes*: + + session: instantiated boto3 session object + s3_session: automatically generated s3 client from the boto3 session + kms_arn: AWS KMS key ARN (optional) + temp_folder: temporary working folder to write/read spock configuration(s) (optional: defaults to /tmp) + + """ + session: boto3.Session + s3_session: BaseClient = attr.ib(init=False) + kms_arn: typing.Optional[str] = None + temp_folder: typing.Optional[str] = '/tmp/' + + def __attrs_post_init__(self): + self.s3_session = self.session.client('s3') + + +def handle_s3_load_path(path: str, s3_config: S3Config) -> str: + """Handles loading from S3 uri + + Handles downloading file from a given s3 uri to a local temp location and passing the path back to the handler + load call + + *Args*: + + path: s3 uri path + s3_config: s3_config object + + *Returns*: + + temp_path: the temporary path of the config file downloaded from s3 + + """ + if s3_config is None: + raise ValueError('Missing S3Config object which is necessary to handle S3 style paths') + bucket, obj, fid = get_s3_bucket_object_name(s3_path=path) + # Construct the full temp path + temp_path = f'{s3_config.temp_folder}/{fid}' + # Strip double slashes if exist + temp_path = temp_path.replace(r'//', r'/') + temp_path = download_s3(bucket=bucket, obj=obj, temp_path=temp_path, s3_session=s3_config.s3_session) + return temp_path + + +def get_s3_bucket_object_name(s3_path: str) -> typing.Tuple[str, str, str]: + """Splits a S3 uri into bucket, object, name + + *Args*: + + s3_path: s3 uri + + *Returns*: + + bucket + object + name + + """ + parsed = urlparse(s3_path) + return parsed.netloc, parsed.path.lstrip('/'), os.path.basename(parsed.path) + + +def download_s3(bucket: str, obj: str, temp_path: str, s3_session: BaseClient) -> str: + """Attempts to download the file from the S3 uri to a temp location + + *Args*: + + bucket: s3 bucket + obj: s3 object + temp_path: local temporary path to write file + s3_session: current s3 session + + *Returns*: + + temp_path: the temporary path of the config file downloaded from s3 + + """ + try: + file_size = s3_session.head_object(Bucket=bucket, Key=obj)['ContentLength'] + print(f'Attempting to download s3://{bucket}/{obj} (size: {size(file_size)})') + current_progress = 0 + n_ticks = 50 + + def _s3_progress_bar(chunk): + nonlocal current_progress + # Increment progress + current_progress += chunk + done = int(n_ticks * (current_progress / file_size)) + sys.stdout.write(f"\r[%s%s] " + f"{int(current_progress/file_size) * 100}%%" % ('=' * done, ' ' * (n_ticks - done))) + sys.stdout.flush() + sys.stdout.write('\n\n') + # Download with the progress callback + s3_session.download_file(bucket, obj, temp_path, Callback=_s3_progress_bar) + return temp_path + except IOError: + print(f'Failed to download file from S3 ' + f'(bucket: {bucket}, object: {obj}) ' + f'and write to {temp_path}') + + +def upload_s3(self): + # Here it should upload to S3 from the written path (/tmp?) + # How to manage KMS or if file is encrypted? Config obj? Would the session have it already + pass \ No newline at end of file diff --git a/spock/builder.py b/spock/builder.py index f53ac857..d6782064 100644 --- a/spock/builder.py +++ b/spock/builder.py @@ -12,6 +12,7 @@ from spock.backend.attr.saver import AttrSaver from spock.utils import check_payload_overwrite from spock.utils import deep_payload_update +import typing class ConfigArgBuilder: @@ -32,13 +33,14 @@ class ConfigArgBuilder: _saver_obj: instance of a BaseSaver class """ - def __init__(self, *args, configs=None, create_save_path=False, desc='', no_cmd_line=False, **kwargs): + def __init__(self, *args, configs: typing.Optional[typing.List] = None, create_save_path: bool = False, + desc: str = '', no_cmd_line: bool = False, s3_config=None, **kwargs): backend = self._set_backend(args) self._create_save_path = create_save_path self._builder_obj = backend.get('builder')( *args, configs=configs, create_save_path=create_save_path, desc=desc, no_cmd_line=no_cmd_line, **kwargs) - self._payload_obj = backend.get('payload') - self._saver_obj = backend.get('saver')() + self._payload_obj = backend.get('payload')(s3_config=s3_config) + self._saver_obj = backend.get('saver')(s3_config=s3_config) try: self._dict_args = self._get_payload() self._arg_namespace = self._builder_obj.generate(self._dict_args) @@ -74,7 +76,7 @@ def generate(self): return self._arg_namespace @staticmethod - def _set_backend(args): + def _set_backend(args: typing.List): """Determines which backend class to use *Args*: @@ -133,12 +135,13 @@ def _get_payload(self): payload = {} dependencies = {'paths': [], 'rel_paths': [], 'roots': []} for configs in args.config: - payload_update = self._payload_obj().payload(self._builder_obj.input_classes, configs, args, dependencies) + payload_update = self._payload_obj.payload(self._builder_obj.input_classes, configs, args, dependencies) check_payload_overwrite(payload, payload_update, configs) deep_payload_update(payload, payload_update) return payload - def save(self, file_name=None, user_specified_path=None, extra_info=True, file_extension='.yaml'): + def save(self, file_name: str = None, user_specified_path: str = None, extra_info: bool = True, + file_extension: str = '.yaml'): """Saves the current config setup to file with a UUID *Args*: diff --git a/spock/handlers.py b/spock/handlers.py index a767543c..3d0d03dc 100644 --- a/spock/handlers.py +++ b/spock/handlers.py @@ -10,7 +10,9 @@ import json import re from spock import __version__ +from spock.utils import check_path_s3 import toml +import typing from warnings import warn import yaml @@ -21,22 +23,41 @@ class Handler(ABC): ABC for loaders """ - @abstractmethod - def load(self, path): + def load(self, path: str, s3_config=None) -> typing.Dict: """Load function for file type + This handles s3 path conversion for all handler types pre load call + + *Args*: + + path: path to file + s3_config: optional s3 config object if using s3 storage + + *Returns*: + + dictionary of read file + + """ + path = self._handle_possible_s3_load_path(path=path, s3_config=s3_config) + return self._load(path=path) + + @abstractmethod + def _load(self, path: str) -> typing.Dict: + """Private load function for file type + *Args*: path: path to file *Returns*: + dictionary of read file """ raise NotImplementedError @abstractmethod - def save(self, out_dict, info_dict, path): + def save(self, out_dict: typing.Dict, info_dict: typing.Optional[typing.Dict], path: str, s3_config=None): """Write function for file type *Args*: @@ -50,6 +71,32 @@ def save(self, out_dict, info_dict, path): """ raise NotImplementedError + @staticmethod + def _handle_possible_s3_load_path(path: str, s3_config=None) -> str: + """Handles the possibility of having to handle a S3 file + + Checks to see if it detects a S3 uri and if so triggers imports of s3 functionality and handles the file + download + + *Args*: + + path: spock config path + s3_config: optional s3 configuration object + + *Returns*: + + path: current path for the configuration file + + """ + is_s3 = check_path_s3(path=path) + if is_s3: + try: + from spock.backend.s3.utils import handle_s3_load_path + path = handle_s3_load_path(path=path, s3_config=s3_config) + except ImportError: + print('Error importing s3 utils after detecting s3:// path') + return path + @staticmethod def write_extra_info(path, info_dict): """Writes extra info to commented newlines @@ -94,7 +141,7 @@ class YAMLHandler(Handler): list(u'-+0123456789.') ) - def load(self, path): + def _load(self, path: str) -> typing.Dict: """YAML load function *Args*: @@ -111,7 +158,7 @@ def load(self, path): base_payload = yaml.safe_load(file_contents) return base_payload - def save(self, out_dict, info_dict, path): + def save(self, out_dict: typing.Dict, info_dict: typing.Optional[typing.Dict], path: str, s3_config=None): """Write function for YAML type *Args*: @@ -137,7 +184,7 @@ class TOMLHandler(Handler): Base TOML class """ - def load(self, path): + def _load(self, path: str) -> typing.Dict: """TOML load function *Args*: @@ -152,7 +199,7 @@ def load(self, path): base_payload = toml.load(path) return base_payload - def save(self, out_dict, info_dict, path): + def save(self, out_dict: typing.Dict, info_dict: typing.Optional[typing.Dict], path: str, s3_config=None): """Write function for TOML type *Args*: @@ -176,7 +223,7 @@ class JSONHandler(Handler): Base JSON class """ - def load(self, path): + def _load(self, path: str) -> typing.Dict: """JSON load function *Args*: @@ -192,7 +239,7 @@ def load(self, path): base_payload = json.load(json_fid) return base_payload - def save(self, out_dict, info_dict, path): + def save(self, out_dict: typing.Dict, info_dict: typing.Optional[typing.Dict], path: str, s3_config=None): """Write function for JSON type *Args*: diff --git a/spock/utils.py b/spock/utils.py index 1b719ac4..aae66403 100644 --- a/spock/utils.py +++ b/spock/utils.py @@ -9,6 +9,7 @@ import attr from enum import EnumMeta import os +import re import socket import subprocess import sys @@ -23,6 +24,24 @@ from typing import _GenericAlias +def check_path_s3(path: str) -> bool: + """Checks the given path to see if it matches the s3:// regex + + *Args*: + + path: a spock config path + + *Returns*: + + boolean of regex match + + """ + # Make a case insensitive s3 regex + s3_regex = re.compile(r'(?i)^s3://').search(path) + # If it returns an object then the path is an s3 style reference + return s3_regex is not None + + def _is_spock_instance(__obj: object): """Checks if the object is a @spock decorated class diff --git a/tests/attr_tests/test_all_attr.py b/tests/attr_tests/test_all_attr.py index 35816d9b..bcc8d1f4 100644 --- a/tests/attr_tests/test_all_attr.py +++ b/tests/attr_tests/test_all_attr.py @@ -648,7 +648,7 @@ def test_yaml_invalid_extension(self, monkeypatch, tmp_path): './tests/conf/yaml/test.yaml']) config = ConfigArgBuilder(TypeConfig, NestedStuff, NestedListStuff, TypeOptConfig, desc='Test Builder') # Test the chained version - with pytest.raises(ValueError): + with pytest.raises(TypeError): config.save(user_specified_path=f'{str(tmp_path)}/foo.bar/fizz.buzz/', file_extension='.foo').generate() diff --git a/tests/debug/debug.py b/tests/debug/debug.py deleted file mode 100644 index 6460b5b2..00000000 --- a/tests/debug/debug.py +++ /dev/null @@ -1,123 +0,0 @@ -# -*- coding: utf-8 -*- -from spock.config import spock -from typing import List -from typing import Optional -from typing import Tuple -from enum import Enum -from spock.builder import ConfigArgBuilder -from spock.config import isinstance_spock -from params.first import Test -from params.first import NestedListStuff -from spock.backend.attr.typed import SavePath -import pickle -from argparse import Namespace - - -# class Choice(Enum): -# pear = 'pear' -# banana = 'banana' -# -# -# class IntChoice(Enum): -# option_1 = 10 -# option_2 = 20 -# -# -# @spock -# class OtherStuff: -# """Other stuff class -# -# Attributes: -# three: heahadsf -# four: asdfjhasdlkf -# -# """ -# three: int -# four: str -# -# -# @spock -# class Stuff: -# """Stuff class -# -# Attributes: -# one: help -# two: teadsfa -# -# """ -# one: int -# two: str -# -# -# class ClassStuff(Enum): -# """Class enum -# -# Attributes: -# other_stuff: OtherStuff class -# stuff: Stuff class -# -# """ -# other_stuff = OtherStuff -# stuff = Stuff - -# -# @spock -# class RepeatStuff: -# hi: int -# bye: float - - -# @spock -# class Test: - # new_choice: Choice - # # fix_me: Tuple[Tuple[int]] - # new: int = 3 - # fail: bool - # fail: Tuple[Tuple[int, int], Tuple[int, int]] - # test: List[int] = [1, 2] - # fail: List[List[int]] = [[1, 2], [1, 2]] - # borken: Stuff = Stuff - # borken: List[RepeatStuff] - # more_borken: OtherStuff - # most_broken: ClassStuff - # borken: int - # borken: List[List[Choice]] = [['pear'], ['banana']] - # save_path: SavePath = '/tmp' - # other: Optional[int] - # value: Optional[List[int]] = [1, 2] - - -# @spock -# class Test2: -# new: int -# other: int -# fail: int -# foo: str - -# -# @spock -# class Test3(Test2, Test): -# ccccombo_breaker: int - - - - -def main(): - attrs_class = ConfigArgBuilder( - Test, NestedListStuff, - desc='I am a description' - ).save(user_specified_path='/tmp', file_extension='.foo').generate() - # with open('/tmp/debug.pickle', 'wb') as fid: - # pickle.dump(attrs_class, file=fid) - - # with open('/tmp/debug.pickle', 'rb') as fid: - # attrs_load = pickle.load(fid) - # attrs_class = ConfigArgBuilder(Test, Test2).generate() - print(attrs_class) - # print(attrs_load) - # dc_class = ConfigArgBuilder(OldInherit).generate() - # print(dc_class) - - -if __name__ == '__main__': - main() diff --git a/tests/debug/debug.yaml b/tests/debug/debug.yaml deleted file mode 100644 index 42c6ce41..00000000 --- a/tests/debug/debug.yaml +++ /dev/null @@ -1,83 +0,0 @@ -#other: 1 -#new_other: 1 -#config: [debug_inherit.yaml] -#fail: [[1, 2], [2, 1]] -###value: [1, 2] -###fix_me: [[1, 2], [2, 1]] -###Test: -### fix_me: [[11, 2], [22, 1]] -##new_choice: pear -##borken: Stuff -#Stuff: -# one: 10 -# two: hello -## - one: 10 -## two: hello -## - one: 20 -## two: bye -# -##more_borken: OtherStuff -#OtherStuff: -# three: 20 -# four: ciao -####Test2: -#### other: 12 -####Test: -#### new: 12 -# -#RepeatStuff: -# - hi: 10 -# bye: 30.0 -# - hi: 20 -# bye: 45.0 - -#one: 18 -#fail: [[1, 2], [3, 4]] -#flipper: True -#nested_list: NestedListStuff -#test: [ 1, 2 ] - -config: [foo.yaml] - -help: hello - - -Test: - test: [ 1, 2 ] - fail: [ [ 1, 2 ], [ 3, 4 ] ] -# most_broken: Stuff - one: 18 -# flipper: false - nested_list: NestedListStuff -# ummm: 10 -# does_not_exist: 10 -# new_choice: pear - -NestedListStuff: - - maybe: 10 - more: hello - - maybe: 20 - more: bye -# borken: RepeatStuff - -##ccccombo_breaker: 10 -#new: 10 -##other: 1 -##Test2: -## fail: 2 -## other: 3 -###fail: 1 -#test: [1, 2] - -#most_broken: OtherStuff - -#save_path: /tmp -#new_choice: pear -#borken: [[pear], [pear, banana]] -#Test2: -# other: 12 -#Test: -# other: 12 -#ccccombo_breaker: 10 -#new: 1 -#fail: true \ No newline at end of file diff --git a/tests/debug/debug_inherit.yaml b/tests/debug/debug_inherit.yaml deleted file mode 100644 index 81a96fd1..00000000 --- a/tests/debug/debug_inherit.yaml +++ /dev/null @@ -1,5 +0,0 @@ -#config: [more_inherit.yaml, most_inherit.yaml] -test: [1, 2] -Test2: - other: 2 - fail: 1 \ No newline at end of file diff --git a/tests/debug/more_inherit.yaml b/tests/debug/more_inherit.yaml deleted file mode 100644 index d054893b..00000000 --- a/tests/debug/more_inherit.yaml +++ /dev/null @@ -1,2 +0,0 @@ -#config: [debug.yaml] -foo: bar \ No newline at end of file diff --git a/tests/debug/most_inherit.yaml b/tests/debug/most_inherit.yaml deleted file mode 100644 index d9e276d6..00000000 --- a/tests/debug/most_inherit.yaml +++ /dev/null @@ -1,2 +0,0 @@ -#config: [more_inherit.yaml] -new: 15 \ No newline at end of file diff --git a/tests/debug/params/__init__.py b/tests/debug/params/__init__.py deleted file mode 100644 index 40a96afc..00000000 --- a/tests/debug/params/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# -*- coding: utf-8 -*- diff --git a/tests/debug/params/first.py b/tests/debug/params/first.py deleted file mode 100644 index b80f52a5..00000000 --- a/tests/debug/params/first.py +++ /dev/null @@ -1,97 +0,0 @@ -# -*- coding: utf-8 -*- -from enum import Enum -from spock.config import spock -from typing import List -from typing import Tuple -from typing import Optional -from .second import Choice - -# -# class Choice(Enum): -# """Blah -# -# Attributes: -# pear: help pears -# banana: help bananas -# -# """ -# pear = 'pear' -# banana = 'banana' -# -# -# @spock -# class OtherStuff: -# """Other stuff class -# -# Attributes: -# three: heahadsf -# four: asdfjhasdlkf -# -# """ -# three: int -# four: str -# -# -# @spock -# class Stuff: -# """Stuff class -# -# Attributes: -# one: help -# two: teadsfa -# -# """ -# one: int -# two: str -# -# -# class ClassStuff(Enum): -# """Class enum -# -# Attributes: -# other_stuff: OtherStuff class -# stuff: Stuff class -# -# """ -# other_stuff = OtherStuff -# stuff = Stuff -# -# -@spock -class NestedListStuff: - """Class enum - - Attributes: - maybe: some val - more: some other value - - """ - maybe: int - more: str - - -@spock -class Test: - """High level docstring that just so happens to be multiline adfjads;lfja;sdlkjfklasjflkasjlkfjal;sdfjlkajsdfl;kja - adfasfdsafklasdjfkladsjklfasdjlkf - - Mid-level docstring - - Attributes: - fail: help me obi wan - test: you are my only hopes - most_broken: class stuff enum - new_choice: choice type optionality - one: just a basic parameter - nested_list: Repeated list of a class type - """ - # new_choice: Choice - # fail: Tuple[List[int], List[int]] - test: Optional[List[int]] - fail: List[List[int]] - # flipper: bool - # most_broken: ClassStuff - one: int - nested_list: List[NestedListStuff] - help: str - diff --git a/tests/debug/params/second.py b/tests/debug/params/second.py deleted file mode 100644 index 8f9c5e5e..00000000 --- a/tests/debug/params/second.py +++ /dev/null @@ -1,14 +0,0 @@ -# -*- coding: utf-8 -*- -from enum import Enum - - -class Choice(Enum): - """Blah - - Attributes: - pear: help pears - banana: help bananas - - """ - pear = 'pear' - banana = 'banana' \ No newline at end of file From 10d0ae791d3ac2d06c90bd81a5d1e785d8ce5db2 Mon Sep 17 00:00:00 2001 From: Nicholas Cilfone Date: Wed, 5 May 2021 09:21:40 -0400 Subject: [PATCH 2/7] Moved s3 support to addons to prevent import at top level. Signed-off-by: Nicholas Cilfone --- spock/__init__.py | 3 +-- spock/{backend => addons}/s3/__init__.py | 0 spock/{backend => addons}/s3/utils.py | 1 - spock/backend/__init__.py | 2 +- spock/handlers.py | 2 +- 5 files changed, 3 insertions(+), 5 deletions(-) rename spock/{backend => addons}/s3/__init__.py (100%) rename spock/{backend => addons}/s3/utils.py (99%) diff --git a/spock/__init__.py b/spock/__init__.py index 9faa0ddf..0ea44bea 100644 --- a/spock/__init__.py +++ b/spock/__init__.py @@ -10,9 +10,8 @@ """ from spock._version import get_versions -from spock.backend.s3.utils import S3Config -__all__ = ["args", "builder", "config", "S3Config"] +__all__ = ["args", "builder", "config"] __version__ = get_versions()['version'] del get_versions \ No newline at end of file diff --git a/spock/backend/s3/__init__.py b/spock/addons/s3/__init__.py similarity index 100% rename from spock/backend/s3/__init__.py rename to spock/addons/s3/__init__.py diff --git a/spock/backend/s3/utils.py b/spock/addons/s3/utils.py similarity index 99% rename from spock/backend/s3/utils.py rename to spock/addons/s3/utils.py index 262c3d4b..bf1cf061 100644 --- a/spock/backend/s3/utils.py +++ b/spock/addons/s3/utils.py @@ -15,7 +15,6 @@ from hurry.filesize import size import os from urllib.parse import urlparse -import re import sys import typing diff --git a/spock/backend/__init__.py b/spock/backend/__init__.py index 7ec9249d..d8767c44 100644 --- a/spock/backend/__init__.py +++ b/spock/backend/__init__.py @@ -8,4 +8,4 @@ Please refer to the documentation provided in the README.md """ -__all__ = ["attr", "base", "s3"] +__all__ = ["attr", "base"] diff --git a/spock/handlers.py b/spock/handlers.py index 3d0d03dc..2c9b23ca 100644 --- a/spock/handlers.py +++ b/spock/handlers.py @@ -91,7 +91,7 @@ def _handle_possible_s3_load_path(path: str, s3_config=None) -> str: is_s3 = check_path_s3(path=path) if is_s3: try: - from spock.backend.s3.utils import handle_s3_load_path + from spock.addons.s3.utils import handle_s3_load_path path = handle_s3_load_path(path=path, s3_config=s3_config) except ImportError: print('Error importing s3 utils after detecting s3:// path') From 4a7f9a29664e5373f59149175ac37aeb0c8d3169 Mon Sep 17 00:00:00 2001 From: Nicholas Cilfone Date: Wed, 5 May 2021 09:21:47 -0400 Subject: [PATCH 3/7] Moved s3 support to addons to prevent import at top level. Signed-off-by: Nicholas Cilfone --- spock/addons/__init__.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 spock/addons/__init__.py diff --git a/spock/addons/__init__.py b/spock/addons/__init__.py new file mode 100644 index 00000000..11ec2709 --- /dev/null +++ b/spock/addons/__init__.py @@ -0,0 +1,13 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 FMR LLC +# SPDX-License-Identifier: Apache-2.0 + +""" +Spock is a framework that helps manage complex parameter configurations for Python applications + +Please refer to the documentation provided in the README.md +""" +from spock.addons.s3.utils import S3Config + +__all__ = ["s3", "S3Config"] From 1e12b873ce25387d3c89c38c1a2dac9cf9fbd272 Mon Sep 17 00:00:00 2001 From: Nicholas Cilfone Date: Thu, 6 May 2021 13:33:02 -0400 Subject: [PATCH 4/7] S3 save functionality Signed-off-by: Nicholas Cilfone --- setup.py | 6 ++- spock/addons/__init__.py | 4 +- spock/addons/s3/configs.py | 64 ++++++++++++++++++++++++ spock/addons/s3/utils.py | 81 +++++++++++++++++++------------ spock/backend/attr/typed.py | 2 +- spock/backend/base.py | 18 +++---- spock/handlers.py | 66 +++++++++++++++++++++---- spock/utils.py | 4 +- tests/attr_tests/test_all_attr.py | 21 +++++--- 9 files changed, 207 insertions(+), 59 deletions(-) create mode 100644 spock/addons/s3/configs.py diff --git a/setup.py b/setup.py index f6ef0a49..9345907e 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,8 @@ # -*- coding: utf-8 -*- + +# Copyright 2019 FMR LLC +# SPDX-License-Identifier: Apache-2.0 + """Spock Setup""" from pkg_resources import parse_requirements @@ -45,5 +49,5 @@ packages=setuptools.find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]), python_requires='>=3.6', install_requires=install_reqs, - extras_require={'s3': ['boto3', 'botocore', 'hurry.filesize']} + extras_require={'s3': ['boto3', 'botocore', 's3transfer', 'hurry.filesize']} ) diff --git a/spock/addons/__init__.py b/spock/addons/__init__.py index 11ec2709..854473e1 100644 --- a/spock/addons/__init__.py +++ b/spock/addons/__init__.py @@ -9,5 +9,7 @@ Please refer to the documentation provided in the README.md """ from spock.addons.s3.utils import S3Config +from spock.addons.s3.configs import S3DownloadConfig +from spock.addons.s3.configs import S3UploadConfig -__all__ = ["s3", "S3Config"] +__all__ = ["s3", "S3Config", "S3DownloadConfig", "S3UploadConfig"] diff --git a/spock/addons/s3/configs.py b/spock/addons/s3/configs.py new file mode 100644 index 00000000..9a4f15b7 --- /dev/null +++ b/spock/addons/s3/configs.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 FMR LLC +# SPDX-License-Identifier: Apache-2.0 + +"""Handles all S3 related configurations""" + +import attr +try: + import boto3 + from botocore.client import BaseClient + from s3transfer.manager import TransferManager +except ImportError: + print('Missing libraries to support S3 functionality. Please re-install spock with the extra s3 dependencies -- ' + 'pip install spock-config[s3]') +import typing + + +# Iterate through the allowed download args for S3 and map into optional attr.ib +download_attrs = { + val: attr.ib( + default=None, + type=str, + validator=attr.validators.optional(attr.validators.instance_of(str)) + ) for val in TransferManager.ALLOWED_DOWNLOAD_ARGS} + + +# Make the class dynamically +S3DownloadConfig = attr.make_class(name="S3DownloadConfig", attrs=download_attrs, kw_only=True, frozen=True) + +# Iterate through the allowed upload args for S3 and map into optional attr.ib +upload_attrs = { + val: attr.ib( + default=None, + type=str, + validator=attr.validators.optional(attr.validators.instance_of(str)) + ) for val in TransferManager.ALLOWED_UPLOAD_ARGS +} + + +# Make the class dynamically +S3UploadConfig = attr.make_class(name="S3UploadConfig", attrs=upload_attrs, kw_only=True, frozen=True) + + +@attr.s(auto_attribs=True) +class S3Config: + """Configuration class for S3 support + + *Attributes*: + + session: instantiated boto3 session object + s3_session: automatically generated s3 client from the boto3 session + kms_arn: AWS KMS key ARN (optional) + temp_folder: temporary working folder to write/read spock configuration(s) (optional: defaults to /tmp) + + """ + session: boto3.Session + s3_session: BaseClient = attr.ib(init=False) + temp_folder: typing.Optional[str] = '/tmp/' + download_config: S3DownloadConfig = S3DownloadConfig() + upload_config: S3UploadConfig = S3UploadConfig() + + def __attrs_post_init__(self): + self.s3_session = self.session.client('s3') diff --git a/spock/addons/s3/utils.py b/spock/addons/s3/utils.py index bf1cf061..1f01efa1 100644 --- a/spock/addons/s3/utils.py +++ b/spock/addons/s3/utils.py @@ -15,31 +15,13 @@ from hurry.filesize import size import os from urllib.parse import urlparse +from spock.addons.s3.configs import S3Config +from spock.addons.s3.configs import S3DownloadConfig +from spock.addons.s3.configs import S3UploadConfig import sys import typing -@attr.s(auto_attribs=True) -class S3Config: - """Configuration class for S3 support - - *Attributes*: - - session: instantiated boto3 session object - s3_session: automatically generated s3 client from the boto3 session - kms_arn: AWS KMS key ARN (optional) - temp_folder: temporary working folder to write/read spock configuration(s) (optional: defaults to /tmp) - - """ - session: boto3.Session - s3_session: BaseClient = attr.ib(init=False) - kms_arn: typing.Optional[str] = None - temp_folder: typing.Optional[str] = '/tmp/' - - def __attrs_post_init__(self): - self.s3_session = self.session.client('s3') - - def handle_s3_load_path(path: str, s3_config: S3Config) -> str: """Handles loading from S3 uri @@ -57,16 +39,31 @@ def handle_s3_load_path(path: str, s3_config: S3Config) -> str: """ if s3_config is None: - raise ValueError('Missing S3Config object which is necessary to handle S3 style paths') + raise ValueError('Load from S3 -- Missing S3Config object which is necessary to handle S3 style paths') bucket, obj, fid = get_s3_bucket_object_name(s3_path=path) # Construct the full temp path temp_path = f'{s3_config.temp_folder}/{fid}' # Strip double slashes if exist temp_path = temp_path.replace(r'//', r'/') - temp_path = download_s3(bucket=bucket, obj=obj, temp_path=temp_path, s3_session=s3_config.s3_session) + temp_path = download_s3( + bucket=bucket, obj=obj, temp_path=temp_path, s3_session=s3_config.s3_session, + download_config=s3_config.download_config + ) return temp_path +def handle_s3_save_path(temp_path: str, s3_path: str, name: str, s3_config: S3Config): + if s3_config is None: + raise ValueError('Save to S3 -- Missing S3Config object which is necessary to handle S3 style paths') + # Fix posix strip + s3_path = s3_path.replace('s3:/', 's3://') + bucket, obj, fid = get_s3_bucket_object_name(f'{s3_path}/{name}') + upload_s3( + bucket=bucket, obj=obj, temp_path=temp_path, + s3_session=s3_config.s3_session, upload_config=s3_config.upload_config + ) + + def get_s3_bucket_object_name(s3_path: str) -> typing.Tuple[str, str, str]: """Splits a S3 uri into bucket, object, name @@ -85,7 +82,8 @@ def get_s3_bucket_object_name(s3_path: str) -> typing.Tuple[str, str, str]: return parsed.netloc, parsed.path.lstrip('/'), os.path.basename(parsed.path) -def download_s3(bucket: str, obj: str, temp_path: str, s3_session: BaseClient) -> str: +def download_s3(bucket: str, obj: str, temp_path: str, s3_session: BaseClient, + download_config: S3DownloadConfig) -> str: """Attempts to download the file from the S3 uri to a temp location *Args*: @@ -101,7 +99,9 @@ def download_s3(bucket: str, obj: str, temp_path: str, s3_session: BaseClient) - """ try: - file_size = s3_session.head_object(Bucket=bucket, Key=obj)['ContentLength'] + # Unroll the extra options for those values that are not None + extra_options = {k: v for k, v in attr.asdict(download_config).items() if v is not None} + file_size = s3_session.head_object(Bucket=bucket, Key=obj, **extra_options)['ContentLength'] print(f'Attempting to download s3://{bucket}/{obj} (size: {size(file_size)})') current_progress = 0 n_ticks = 50 @@ -116,7 +116,7 @@ def _s3_progress_bar(chunk): sys.stdout.flush() sys.stdout.write('\n\n') # Download with the progress callback - s3_session.download_file(bucket, obj, temp_path, Callback=_s3_progress_bar) + s3_session.download_file(bucket, obj, temp_path, Callback=_s3_progress_bar, ExtraArgs=extra_options) return temp_path except IOError: print(f'Failed to download file from S3 ' @@ -124,7 +124,28 @@ def _s3_progress_bar(chunk): f'and write to {temp_path}') -def upload_s3(self): - # Here it should upload to S3 from the written path (/tmp?) - # How to manage KMS or if file is encrypted? Config obj? Would the session have it already - pass \ No newline at end of file +def upload_s3(bucket: str, obj: str, temp_path: str, s3_session: BaseClient, + upload_config: S3UploadConfig): + try: + # Unroll the extra options for those values that are not None + extra_options = {k: v for k, v in attr.asdict(upload_config).items() if v is not None} + file_size = os.path.getsize(temp_path) + print(f'Attempting to upload s3://{bucket}/{obj} (size: {size(file_size)})') + current_progress = 0 + n_ticks = 50 + + def _s3_progress_bar(chunk): + nonlocal current_progress + # Increment progress + current_progress += chunk + done = int(n_ticks * (current_progress / file_size)) + sys.stdout.write(f"\r[%s%s] " + f"{int(current_progress/file_size) * 100}%%" % ('=' * done, ' ' * (n_ticks - done))) + sys.stdout.flush() + sys.stdout.write('\n\n') + # Upload with progress callback + s3_session.upload_file(temp_path, bucket, obj, Callback=_s3_progress_bar, ExtraArgs=extra_options) + except IOError: + print(f'Failed to upload file to S3 ' + f'(bucket: {bucket}, object: {obj}) ' + f'from {temp_path}') diff --git a/spock/backend/attr/typed.py b/spock/backend/attr/typed.py index fbc14b66..8452e111 100644 --- a/spock/backend/attr/typed.py +++ b/spock/backend/attr/typed.py @@ -306,7 +306,7 @@ def _type_katra(typed, default=None, optional=False): elif isinstance(typed, _GenericAlias): name = _get_name_py_version(typed=typed) else: - raise TypeError('Encountered an uxpected type in _type_katra') + raise TypeError('Encountered an unexpected type in _type_katra') special_key = None # Default booleans to false and optional due to the nature of a boolean if isinstance(typed, type) and name == "bool": diff --git a/spock/backend/base.py b/spock/backend/base.py index 64ff822e..a6bd6364 100644 --- a/spock/backend/base.py +++ b/spock/backend/base.py @@ -97,21 +97,21 @@ def save(self, payload, path, file_name=None, create_save_path=False, extra_info """ # Check extension self._check_extension(file_extension=file_extension) - # Make the filename - fname = str(uuid1()) if file_name is None else file_name - name = f'{fname}.spock.cfg{file_extension}' - fid = path / name + # Make the filename -- always append a uuid for unique-ness + uuid_str = str(uuid1()) + fname = '' if file_name is None else f'{file_name}.' + name = f'{fname}{uuid_str}.spock.cfg{file_extension}' # Fix up values -- parameters out_dict = self._clean_up_values(payload, file_extension) # Get extra info extra_dict = add_info() if extra_info else None try: - if not os.path.exists(path) and create_save_path: - os.makedirs(path) - with open(fid, 'w') as file_out: - self._supported_extensions.get(file_extension)().save(out_dict, extra_dict, file_out) + self._supported_extensions.get(file_extension)().save( + out_dict=out_dict, info_dict=extra_dict, path=str(path), name=name, + create_path=create_save_path, s3_config=self._s3_config + ) except OSError as e: - print(f'Not a valid file path to write to: {fid}') + print(f'Unable to write to given path: {path / name}') raise e @abstractmethod diff --git a/spock/handlers.py b/spock/handlers.py index 2c9b23ca..4567cd8d 100644 --- a/spock/handlers.py +++ b/spock/handlers.py @@ -8,6 +8,7 @@ from abc import ABC from abc import abstractmethod import json +import os import re from spock import __version__ from spock.utils import check_path_s3 @@ -56,8 +57,35 @@ def _load(self, path: str) -> typing.Dict: """ raise NotImplementedError + def save(self, out_dict: typing.Dict, info_dict: typing.Optional[typing.Dict], path: str, name: str, + create_path: bool = False, s3_config=None): + """Write function for file type + + *Args*: + + out_dict: payload to write + info_dict: info payload to write + path: path to write out + create_path: boolean to create the path if non-existent (for non S3) + s3_config: optional s3 config object if using s3 storage + + *Returns*: + + """ + write_path, is_s3 = self._handle_possible_s3_save_path( + path=path, name=name, create_path=create_path, s3_config=s3_config + ) + write_path = self._save(out_dict=out_dict, info_dict=info_dict, path=write_path) + # After write check if it needs to be pushed to S3 + if is_s3: + try: + from spock.addons.s3.utils import handle_s3_save_path + handle_s3_save_path(temp_path=write_path, s3_path=path, name=name, s3_config=s3_config) + except ImportError: + print('Error importing spock s3 utils after detecting s3:// save path') + @abstractmethod - def save(self, out_dict: typing.Dict, info_dict: typing.Optional[typing.Dict], path: str, s3_config=None): + def _save(self, out_dict: typing.Dict, info_dict: typing.Optional[typing.Dict], path: str) -> str: """Write function for file type *Args*: @@ -94,9 +122,26 @@ def _handle_possible_s3_load_path(path: str, s3_config=None) -> str: from spock.addons.s3.utils import handle_s3_load_path path = handle_s3_load_path(path=path, s3_config=s3_config) except ImportError: - print('Error importing s3 utils after detecting s3:// path') + print('Error importing spock s3 utils after detecting s3:// load path') return path + @staticmethod + def _handle_possible_s3_save_path(path: str, name: str, create_path: bool, + s3_config=None) -> typing.Tuple[str, bool]: + # Posix path will strip multiple forward slashes so map back + # path = path.replace('s3:/', 's3://') + is_s3 = check_path_s3(path=path) + if is_s3: + write_path = f'{s3_config.temp_folder}/{name}' + # Strip double slashes if exist + write_path = write_path.replace(r'//', r'/') + else: + # Handle the path logic for non S3 + if not os.path.exists(path) and create_path: + os.makedirs(path) + write_path = f'{path}/{name}' + return write_path, is_s3 + @staticmethod def write_extra_info(path, info_dict): """Writes extra info to commented newlines @@ -110,7 +155,7 @@ def write_extra_info(path, info_dict): """ # Write the commented info as new lines - with open(path.name, 'w+') as fid: + with open(path, 'w+') as fid: # Write a spock header fid.write(f'# Spock Version: {__version__}\n') # Write info dict if not None @@ -158,7 +203,7 @@ def _load(self, path: str) -> typing.Dict: base_payload = yaml.safe_load(file_contents) return base_payload - def save(self, out_dict: typing.Dict, info_dict: typing.Optional[typing.Dict], path: str, s3_config=None): + def _save(self, out_dict: typing.Dict, info_dict: typing.Optional[typing.Dict], path: str): """Write function for YAML type *Args*: @@ -174,8 +219,9 @@ def save(self, out_dict: typing.Dict, info_dict: typing.Optional[typing.Dict], p self.write_extra_info(path=path, info_dict=info_dict) # Remove aliases in YAML dump yaml.Dumper.ignore_aliases = lambda *args: True - with open(path.name, 'a') as yaml_fid: + with open(path, 'a') as yaml_fid: yaml.safe_dump(out_dict, yaml_fid, default_flow_style=False) + return path class TOMLHandler(Handler): @@ -199,7 +245,7 @@ def _load(self, path: str) -> typing.Dict: base_payload = toml.load(path) return base_payload - def save(self, out_dict: typing.Dict, info_dict: typing.Optional[typing.Dict], path: str, s3_config=None): + def _save(self, out_dict: typing.Dict, info_dict: typing.Optional[typing.Dict], path: str): """Write function for TOML type *Args*: @@ -213,8 +259,9 @@ def save(self, out_dict: typing.Dict, info_dict: typing.Optional[typing.Dict], p """ # First write the commented info self.write_extra_info(path=path, info_dict=info_dict) - with open(path.name, 'a') as toml_fid: + with open(path, 'a') as toml_fid: toml.dump(out_dict, toml_fid) + return path class JSONHandler(Handler): @@ -239,7 +286,7 @@ def _load(self, path: str) -> typing.Dict: base_payload = json.load(json_fid) return base_payload - def save(self, out_dict: typing.Dict, info_dict: typing.Optional[typing.Dict], path: str, s3_config=None): + def _save(self, out_dict: typing.Dict, info_dict: typing.Optional[typing.Dict], path: str): """Write function for JSON type *Args*: @@ -254,5 +301,6 @@ def save(self, out_dict: typing.Dict, info_dict: typing.Optional[typing.Dict], p if info_dict is not None: warn('JSON does not support comments and thus cannot save extra info to file... removing extra info') info_dict = None - with open(path.name, 'a') as json_fid: + with open(path, 'a') as json_fid: json.dump(out_dict, json_fid, indent=4, separators=(',', ': ')) + return path diff --git a/spock/utils.py b/spock/utils.py index aae66403..282430a5 100644 --- a/spock/utils.py +++ b/spock/utils.py @@ -36,8 +36,8 @@ def check_path_s3(path: str) -> bool: boolean of regex match """ - # Make a case insensitive s3 regex - s3_regex = re.compile(r'(?i)^s3://').search(path) + # Make a case insensitive s3 regex with single or double forward slash (due to posix stripping) + s3_regex = re.compile(r'(?i)^s3://?').search(path) # If it returns an object then the path is an s3 style reference return s3_regex is not None diff --git a/tests/attr_tests/test_all_attr.py b/tests/attr_tests/test_all_attr.py index bcc8d1f4..fc951a78 100644 --- a/tests/attr_tests/test_all_attr.py +++ b/tests/attr_tests/test_all_attr.py @@ -4,12 +4,14 @@ # SPDX-License-Identifier: Apache-2.0 from attr.exceptions import FrozenInstanceError +import datetime import glob import os import pytest from spock.builder import ConfigArgBuilder from spock.config import isinstance_spock from tests.attr_tests.attr_configs_test import * +import re import sys @@ -605,15 +607,22 @@ def test_yaml_file_writer_save_path(self, monkeypatch): './tests/conf/yaml/test_save_path.yaml']) config = ConfigArgBuilder(TypeConfig, NestedStuff, NestedListStuff, TypeOptConfig, desc='Test Builder') # Test the chained version - config_values = config.save(file_extension='.yaml', file_name='pytest').generate() - check_path = f'{str(config_values.TypeConfig.save_path)}/pytest.spock.cfg.yaml' - fname = glob.glob(check_path)[0] + now = datetime.datetime.now() + curr_int_time = int(f'{now.year}{now.month}{now.day}{now.hour}{now.second}') + + config_values = config.save(file_extension='.yaml', file_name=f'pytest.{curr_int_time}').generate() + yaml_regex = re.compile(fr'pytest.{curr_int_time}.' + fr'[a-fA-F0-9]{{8}}-[a-fA-F0-9]{{4}}-[a-fA-F0-9]{{4}}-' + fr'[a-fA-F0-9]{{4}}-[a-fA-F0-9]{{12}}.spock.cfg.yaml') + matches = [re.fullmatch(yaml_regex, val) for val in os.listdir(str(config_values.TypeConfig.save_path)) + if re.fullmatch(yaml_regex, val) is not None] + fname = f'{str(config_values.TypeConfig.save_path)}/{matches[0].string}' with open(fname, 'r') as fin: print(fin.read()) - assert os.path.exists(check_path) + assert os.path.exists(fname) # Clean up if assert is good - if os.path.exists(check_path): - os.remove(check_path) + if os.path.exists(fname): + os.remove(fname) class TestYAMLWriterNoPath: From 12149c7e09f68efcc2379b4176ea82c0011967fc Mon Sep 17 00:00:00 2001 From: Nicholas Cilfone Date: Thu, 6 May 2021 14:09:58 -0400 Subject: [PATCH 5/7] docstrings Signed-off-by: Nicholas Cilfone --- spock/addons/s3/utils.py | 29 ++++++++++++++++++++++++++++- spock/handlers.py | 21 ++++++++++++++++++--- 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/spock/addons/s3/utils.py b/spock/addons/s3/utils.py index 1f01efa1..5ea03ffc 100644 --- a/spock/addons/s3/utils.py +++ b/spock/addons/s3/utils.py @@ -53,6 +53,20 @@ def handle_s3_load_path(path: str, s3_config: S3Config) -> str: def handle_s3_save_path(temp_path: str, s3_path: str, name: str, s3_config: S3Config): + """Handles saving to S3 uri + + Points to the local spock configuration file and handles getting it up to S3 + + *Args*: + + temp_path: the temporary path the spock config was written out to locally + s3_path: base s3 uri + name: spock generated filename + s3_config: s3_config object + + Returns: + + """ if s3_config is None: raise ValueError('Save to S3 -- Missing S3Config object which is necessary to handle S3 style paths') # Fix posix strip @@ -84,7 +98,7 @@ def get_s3_bucket_object_name(s3_path: str) -> typing.Tuple[str, str, str]: def download_s3(bucket: str, obj: str, temp_path: str, s3_session: BaseClient, download_config: S3DownloadConfig) -> str: - """Attempts to download the file from the S3 uri to a temp location + """Attempts to download the file from the S3 uri to a temp location using any extra arguments to the download *Args*: @@ -92,6 +106,7 @@ def download_s3(bucket: str, obj: str, temp_path: str, s3_session: BaseClient, obj: s3 object temp_path: local temporary path to write file s3_session: current s3 session + download_config: S3DownloadConfig with extra options for the file transfer *Returns*: @@ -126,6 +141,18 @@ def _s3_progress_bar(chunk): def upload_s3(bucket: str, obj: str, temp_path: str, s3_session: BaseClient, upload_config: S3UploadConfig): + """Attempts to upload the local file to the S3 uri using any extra arguments to the upload + + Args: + bucket: s3 bucket + obj: s3 object + temp_path: temporary path of the config file + s3_session: current s3 session + upload_config: S3UploadConfig with extra options for the file transfer + + Returns: + + """ try: # Unroll the extra options for those values that are not None extra_options = {k: v for k, v in attr.asdict(upload_config).items() if v is not None} diff --git a/spock/handlers.py b/spock/handlers.py index 4567cd8d..255bbd87 100644 --- a/spock/handlers.py +++ b/spock/handlers.py @@ -61,11 +61,15 @@ def save(self, out_dict: typing.Dict, info_dict: typing.Optional[typing.Dict], p create_path: bool = False, s3_config=None): """Write function for file type + This will handle local or s3 writes with the boolean is_s3 flag. If detected it will conditionally import + the necessary addons to handle the upload + *Args*: out_dict: payload to write info_dict: info payload to write path: path to write out + name: spock generated file name create_path: boolean to create the path if non-existent (for non S3) s3_config: optional s3 config object if using s3 storage @@ -101,7 +105,7 @@ def _save(self, out_dict: typing.Dict, info_dict: typing.Optional[typing.Dict], @staticmethod def _handle_possible_s3_load_path(path: str, s3_config=None) -> str: - """Handles the possibility of having to handle a S3 file + """Handles the possibility of having to handle loading from a S3 path Checks to see if it detects a S3 uri and if so triggers imports of s3 functionality and handles the file download @@ -128,8 +132,19 @@ def _handle_possible_s3_load_path(path: str, s3_config=None) -> str: @staticmethod def _handle_possible_s3_save_path(path: str, name: str, create_path: bool, s3_config=None) -> typing.Tuple[str, bool]: - # Posix path will strip multiple forward slashes so map back - # path = path.replace('s3:/', 's3://') + """Handles the possibility of having to save to a S3 path + + Checks to see if it detects a S3 uri and if so generates a tmp location to write the file to pre-upload + + Args: + path: save path + name: spock generated file name + create_path: create the path for non s3 data + s3_config: s3 config object + + Returns: + + """ is_s3 = check_path_s3(path=path) if is_s3: write_path = f'{s3_config.temp_folder}/{name}' From 8fe6c9f2d525c9e44b84246f9a6e2a9c3e190062 Mon Sep 17 00:00:00 2001 From: Nicholas Cilfone Date: Thu, 6 May 2021 14:10:50 -0400 Subject: [PATCH 6/7] docstrings Signed-off-by: Nicholas Cilfone --- spock/addons/s3/utils.py | 7 ++++--- spock/handlers.py | 5 +++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/spock/addons/s3/utils.py b/spock/addons/s3/utils.py index 5ea03ffc..b463f57b 100644 --- a/spock/addons/s3/utils.py +++ b/spock/addons/s3/utils.py @@ -64,7 +64,7 @@ def handle_s3_save_path(temp_path: str, s3_path: str, name: str, s3_config: S3Co name: spock generated filename s3_config: s3_config object - Returns: + *Returns*: """ if s3_config is None: @@ -143,14 +143,15 @@ def upload_s3(bucket: str, obj: str, temp_path: str, s3_session: BaseClient, upload_config: S3UploadConfig): """Attempts to upload the local file to the S3 uri using any extra arguments to the upload - Args: + *Args*: + bucket: s3 bucket obj: s3 object temp_path: temporary path of the config file s3_session: current s3 session upload_config: S3UploadConfig with extra options for the file transfer - Returns: + *Returns*: """ try: diff --git a/spock/handlers.py b/spock/handlers.py index 255bbd87..96efb05f 100644 --- a/spock/handlers.py +++ b/spock/handlers.py @@ -136,13 +136,14 @@ def _handle_possible_s3_save_path(path: str, name: str, create_path: bool, Checks to see if it detects a S3 uri and if so generates a tmp location to write the file to pre-upload - Args: + *Args*: + path: save path name: spock generated file name create_path: create the path for non s3 data s3_config: s3 config object - Returns: + *Returns*: """ is_s3 = check_path_s3(path=path) From 4f496d646eb2a8e3a251a258c01620880be114ef Mon Sep 17 00:00:00 2001 From: Nicholas Cilfone Date: Thu, 6 May 2021 16:38:33 -0400 Subject: [PATCH 7/7] Added S3 docs Signed-off-by: Nicholas Cilfone --- README.md | 16 +++--- docs/addons/S3.md | 130 ++++++++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 5 ++ 3 files changed, 143 insertions(+), 8 deletions(-) create mode 100644 docs/addons/S3.md diff --git a/README.md b/README.md index 33799a77..8901f36c 100644 --- a/README.md +++ b/README.md @@ -39,18 +39,16 @@ recent features, bugfixes, and hotfixes. See [Releases](https://github.com/fidelity/spock/releases) for more information. -#### March 18th, 2021 +#### May 6th, 2021 +* Added S3 support with `pip install spock-config[s3]` +* S3 addon supports automatically handling loading/saving from paths defined with `s3://` URI(s) by passing in an +active `boto3.Session` + +#### March 18th, 2021 * Support for Google docstring style annotation of `spock` class (and Enums) and attributes * Added in ability to print docstring annotated help information to command line with `--help` argument -#### March 1st, 2021 - -* Removed legacy backend and API (dataclasses and custom typed interface) -* Updated markdown save call to support advanced types so that saved configurations are now valid `spock` config - input files -* Changed tuples to support length restrictions - ## Documentation Current documentation and more information can be found [here](https://fidelity.github.io/spock/). @@ -72,6 +70,8 @@ set of parameters. * Tractability and Reproducibility: Save runtime parameter configuration to YAML, TOML, or JSON with a single chained command (with extra runtime info such as Git info, Python version, machine FQDN, etc). The saved markdown file can be used as the configuration input to reproduce prior runtime configurations. +* S3 Addon: Automatically detects `s3://` URI(s) and handles loading and saving `spock` configuration files when an + active `boto3.Session` is passed in (plus any additional `S3Transfer` configurations) #### Main Contributors diff --git a/docs/addons/S3.md b/docs/addons/S3.md new file mode 100644 index 00000000..d2ac5d32 --- /dev/null +++ b/docs/addons/S3.md @@ -0,0 +1,130 @@ +# S3 Support + +When installed with the S3 addon `spock` will attempt to identify S3 URI(s) (e.g. `s3:///`) and handle +them automatically. The user only needs to provide an active `boto3.session.Session` to an `S3Config` object and pass +it to the `ConfigArgBuilder`. + + +### Installing + +Install `spock` with the extra s3 related dependencies. + +```bash +pip install spock-config[s3] +``` + +### Creating a boto3 Session + +The user must provide an active `boto3.session.Session` object to `spock` in order for the library to automatically +handle S3 URI(s). Configuration is **highly dependent** upon your current AWS setup/security. Please refer to the +`boto3` docs for [session](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html) and +[credentials](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html) for help on how to +correctly configure your `boto3.session.Session`. + +For instance, let's just suppose we are going to get our tokens via SAML authorization +where we already have the SAMLAssertion, RoleArn, and PrincipalArn stored as env variables: + +```python +import boto3 +import os + +client = boto3.client('sts') + +token = client.assume_role_with_saml( + RoleArn=os.environ.get("RoleArn"), PrincipalArn=os.environ.get("PrincipalArn"), + SAMLAssertion=os.environ.get("SamlString") +) + +credentials = token['Credentials'] + +session = boto3.Session( + aws_access_key_id=credentials['AccessKeyId'], + aws_secret_access_key=credentials['SecretAccessKey'], + aws_session_token=credentials['SessionToken'], + region_name=os.environ.get('AWS_REGION')) +``` + +### Using the S3Config Object + +As an example let's create a basic `@spock` decorated class, instantiate a `S3Config` object from `spock.addons` with +the `boto3.session.Session` we created above, and pass it to the `ConfigArgBuilder`. + +```python +from spock.addons import S3Config +from spock.builder import ConfigArgBuilder +from spock.config import spock +from typing import List + +@spock +class BasicConfig: + """Basic spock configuration for example purposes + + Attributes: + parameter: simple boolean that flags rounding + fancy_parameter: parameter that multiplies a value + fancier_parameter: parameter that gets added to product of val and fancy_parameter + most_fancy_parameter: values to apply basic algebra to + + """ + parameter: bool + fancy_parameter: float + fancier_parameter: float + most_fancy_parameter: List[int] + +def main(): + # Create an S3Config object and pass in the boto3 session + s3_config = S3Config( + session=session + ) + # Chain the generate function to the ConfigArgBuilder call + # Pass in the S3Config object + config = ConfigArgBuilder( + BasicConfig, + desc='S3 example', + s3_config=s3_config + ).generate() +``` + +### Defining the configuration file with a S3 URI + +Usually we pass a relative or absolute system path as the configuration file command line argument. Here we pass +in a S3 URI instead: + +```bash +$ python simple.py -c s3://my-bucket/path/to/file/config.yaml +``` + +With a `S3Config` object passed into the `ConfigArgBuilder` the S3 URI will automatically be handled by `spock`. + +### Saving to a S3 URI + +Similarly, we usually pass a relative or absolute system path to the `SavePath` special argument type or +to the `user_specified_path` kwarg. Again, instead we give a S3 URI: + +```python +def main(): + # Create an S3Config object and pass in the boto3 session + s3_config = S3Config( + session=session + ) + # Chain the generate function to the ConfigArgBuilder call + # Pass in the S3Config object + config = ConfigArgBuilder( + BasicConfig, + desc='S3 example', + s3_config=s3_config + ).save(user_specified_path="s3://my-bucket/path/to/file/").generate() +``` + +With a `S3Config` object passed into the `ConfigArgBuilder` the S3 URI will automatically be handled by `spock`. + +### S3Transfer ExtraArgs + +If you require any other settings for uploading or downloading files from S3 the `S3Config` class has two extra +attributes: + +`download_config` which takes a `S3DownloadConfig` object from `spock.addons` which supports all ExtraArgs from +[S3Transfer.ALLOWED_DOWNLOAD_ARGS](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/customizations/s3.html#boto3.s3.transfer.S3Transfer.ALLOWED_DOWNLOAD_ARGS) + +`upload_config` which takes a `S3UploadConfig` object from `spock.addons` which supports all ExtraArgs from +[S3Transfer.ALLOWED_UPLOAD_ARGS](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/customizations/s3.html#boto3.s3.transfer.S3Transfer.ALLOWED_UPLOAD_ARGS) diff --git a/pyproject.toml b/pyproject.toml index 7fceba8e..c7341354 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,5 +60,10 @@ Motivation = "docs/Motivation.md" [[tool.portray.mkdocs.nav."Advanced Features"]] "Command Line Overrides" = "docs/advanced_features/Command-Line-Overrides.md" +# Addons +[[tool.portray.mkdocs.nav]] + [[tool.portray.mkdocs.nav."Addons"]] + "S3" = "docs/addons/S3.md" + [[tool.portray.mkdocs.nav]] Contributing = "CONTRIBUTING.md"