Skip to content

Commit

Permalink
Merge 2894ca8 into 3feead0
Browse files Browse the repository at this point in the history
  • Loading branch information
abegong committed Aug 18, 2019
2 parents 3feead0 + 2894ca8 commit d5ff126
Show file tree
Hide file tree
Showing 13 changed files with 408 additions and 288 deletions.
270 changes: 121 additions & 149 deletions great_expectations/data_context/data_context.py

Large diffs are not rendered by default.

84 changes: 46 additions & 38 deletions great_expectations/data_context/store/store.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,35 @@
import logging
logger = logging.getLogger(__name__)

import json
import os
import io
import six

import pandas as pd

from ..util import safe_mmkdir

from ..types import (
NameSpaceDotDict,
)
from .types import (
InMemoryStoreConfig,
FilesystemStoreConfig,
)
from ..types import (
NameSpaceDotDict,
)
from ..util import safe_mmkdir
import pandas as pd
import six
import io
import os
import json
import logging
logger = logging.getLogger(__name__)


class ContextAwareStore(object):
def __init__(
self,
data_context,
config,
):
#FIXME: Eek. This causes circular imports. What to do?
#TODO: remove the dependency. Stores should be based on namespaceIdentifier objects, but not Context itself.
# FIXME: Eek. This causes circular imports. What to do?
# TODO: remove the dependency. Stores should be based on namespaceIdentifier objects, but not Context itself.
# if not isinstance(data_context, DataContext):
# raise TypeError("data_context must be an instance of type DataContext")

self.data_context = data_context

if not isinstance(config, self.get_config_class()):
#Attempt to coerce config to a typed config
# Attempt to coerce config to a typed config
config = self.get_config_class()(
coerce_types=True,
**config
Expand All @@ -47,29 +44,33 @@ def get(self, key, serialization_type=None):
value = self._get(namespaced_key)

if serialization_type:
deserialization_method = self._get_deserialization_method(serialization_type)
deserialization_method = self._get_deserialization_method(
serialization_type)
else:
deserialization_method = self._get_deserialization_method(self.config.serialization_type)
deserialization_method = self._get_deserialization_method(
self.config.serialization_type)
deserialized_value = deserialization_method(value)
return deserialized_value

def set(self, key, value, serialization_type=None):
namespaced_key = self._get_namespaced_key(key)

if serialization_type:
serialization_method = self._get_serialization_method(serialization_type)
serialization_method = self._get_serialization_method(
serialization_type)
else:
serialization_method = self._get_serialization_method(self.config.serialization_type)

serialization_method = self._get_serialization_method(
self.config.serialization_type)

serialized_value = serialization_method(value)
self._set(namespaced_key, serialized_value)

@classmethod
def get_config_class(cls):
return cls.config_class

def _get_namespaced_key(self, key):
#TODO: This method is a placeholder until we bring in _get_namespaced_key from NameSpacedFilesystemStore
# TODO: This method is a placeholder until we bring in _get_namespaced_key from NameSpacedFilesystemStore
return key

def _get_serialization_method(self, serialization_type):
Expand All @@ -85,12 +86,12 @@ def convert_to_csv(df):
logger.debug("Starting convert_to_csv")

assert isinstance(df, pd.DataFrame)

return df.to_csv(index=None)

return convert_to_csv

#TODO: Add more serialization methods as needed
# TODO: Add more serialization methods as needed

def _get_deserialization_method(self, serialization_type):
if serialization_type == None:
Expand All @@ -100,20 +101,23 @@ def _get_deserialization_method(self, serialization_type):
return json.loads

elif serialization_type == "pandas_csv":
#TODO:
# TODO:
raise NotImplementedError

#TODO: Add more serialization methods as needed
# TODO: Add more serialization methods as needed

def _get(self, key):
raise NotImplementedError

def _set(self, key, value):
raise NotImplementedError

def list_keys(self):
raise NotImplementedError

def has_key(self, key):
raise NotImplementedError


class InMemoryStore(ContextAwareStore):
"""Uses an in-memory dictionary as a store.
Expand All @@ -129,10 +133,13 @@ def _get(self, key):

def _set(self, key, value):
self.store[key] = value

def list_keys(self):
return self.store.keys()

def has_key(self, key):
return key in self.store


class FilesystemStore(ContextAwareStore):
"""Uses a local filepath as a store.
Expand Down Expand Up @@ -185,8 +192,8 @@ def _setup(self):

safe_mmkdir(str(os.path.dirname(self.full_base_directory)))

#TODO: This method should probably live in ContextAwareStore
#For the moment, I'm leaving it here, because:
# TODO: This method should probably live in ContextAwareStore
# For the moment, I'm leaving it here, because:
# 1. This method and NameSpaceDotDict isn't yet general enough to handle all permutations of namespace objects
# 2. Rewriting all the tests in test_store is more work than I can take on right now.
# 3. Probably the best thing to do is to test BOTH classes that take simple strings as keys, and classes that take full NameSpaceDotDicts. But that relies on (1).
Expand All @@ -196,7 +203,8 @@ def _setup(self):

def _get_namespaced_key(self, key):
if not isinstance(key, NameSpaceDotDict):
raise TypeError("key must be an instance of type NameSpaceDotDict, not {0}".format(type(key)))
raise TypeError(
"key must be an instance of type NameSpaceDotDict, not {0}".format(type(key)))

filepath = self.data_context._get_normalized_data_asset_name_filepath(
key.normalized_data_asset_name,
Expand All @@ -208,7 +216,7 @@ def _get_namespaced_key(self, key):
file_extension=self.config.file_extension
)
return filepath

def get_most_recent_run_id(self):
run_id_list = os.listdir(self.full_base_directory)

Expand All @@ -231,8 +239,8 @@ def get_most_recent_run_id(self):
# def _set(self, key, value):
# raise NotImplementedError

#This code is from an earlier (untested) implementation of DataContext.register_validation_results
#Storing it here in case it can be salvaged
# This code is from an earlier (untested) implementation of DataContext.register_validation_results
# Storing it here in case it can be salvaged
# if isinstance(data_asset_snapshot_store, dict) and "s3" in data_asset_snapshot_store:
# bucket = data_asset_snapshot_store["s3"]["bucket"]
# key_prefix = data_asset_snapshot_store["s3"]["key_prefix"]
Expand Down Expand Up @@ -260,4 +268,4 @@ def get_most_recent_run_id(self):
# except ImportError:
# logger.error("Error importing boto3 for AWS support. Unable to save to result store.")
# except Exception:
# raise
# raise
2 changes: 2 additions & 0 deletions great_expectations/data_context/templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
# and any configured evaluation parameter store
plugins_directory: plugins/
expectations_directory: expectations/
evaluation_parameter_store_name: evaluation_parameter_store
# Configure additional data context options here.
Expand Down
56 changes: 0 additions & 56 deletions great_expectations/data_context/types.py

This file was deleted.

39 changes: 38 additions & 1 deletion great_expectations/data_context/types/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,38 @@
from .configurations import ClassConfig
from ...types import LooselyTypedDotDict
from six import string_types
from collections import namedtuple
from six import string_types

from .configurations import (
Config,
ClassConfig,
DataContextConfig,
)

NormalizedDataAssetName = namedtuple("NormalizedDataAssetName", [
"datasource",
"generator",
"generator_asset"
])


class NameSpaceDotDict(LooselyTypedDotDict):
# Note: As of 2018/8/14, this class is VERY MUCH a work in progress
# It should almost certainly be a namedtuple, to keep it immutable.
_allowed_keys = set([
"normalized_data_asset_name",
"expectation_suite_name",
"run_id",
])

_required_keys = set([
"normalized_data_asset_name",
"expectation_suite_name",
"run_id",
])

_key_types = {
"normalized_data_asset_name": NormalizedDataAssetName,
"expectation_suite_name": string_types,
"run_id": string_types,
}
37 changes: 36 additions & 1 deletion great_expectations/data_context/types/configurations.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
from six import string_types

from ruamel.yaml import YAML, yaml_object
from great_expectations.types import LooselyTypedDotDict
yaml = YAML()


class Config(LooselyTypedDotDict):
pass


@yaml_object(yaml)
class ClassConfig(LooselyTypedDotDict):
class ClassConfig(Config):
_allowed_keys = {
"module_name",
"class_name"
Expand All @@ -16,3 +22,32 @@ class ClassConfig(LooselyTypedDotDict):
"module_name": str,
"class_name": str
}


class DataContextConfig(Config):
_allowed_keys = set([
"plugins_directory",
"expectations_directory",
"evaluation_parameter_store_name",
"datasources",
"stores",
"data_docs", # TODO: Rename this to sites, to remove a layer of extraneous nesting
])

_required_keys = set([
"plugins_directory",
"expectations_directory",
"evaluation_parameter_store_name",
"datasources",
"stores",
"data_docs",
])

_key_types = {
"plugins_directory": string_types,
"expectations_directory": string_types,
"evaluation_parameter_store_name": string_types,
"datasources": dict,
"stores": dict,
"data_docs": dict,
}
17 changes: 11 additions & 6 deletions great_expectations/datasource/datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,12 +173,17 @@ def save_config(self):
Returns:
None
"""
if self._data_context is not None:
self._data_context._save_project_config()
else:
config_filepath = "great_expectations.yml"
with open(config_filepath, 'w') as config_file:
yaml.dump(self._datasource_config, config_file)

#FIXME: Not clear if deprecating this causes any breakages elsewhere...

#Yikes. This is not the datasource's job:
# if self._data_context is not None:
# self._data_context._save_project_config()
# else:
# config_filepath = "great_expectations.yml"
# with open(config_filepath, 'w') as config_file:
# yaml.dump(self._datasource_config, config_file)


# if self._data_context is not None:
# base_config = copy.deepcopy(self._datasource_config)
Expand Down

0 comments on commit d5ff126

Please sign in to comment.