-
Notifications
You must be signed in to change notification settings - Fork 258
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Restructure code to encapsulate the `save_to_feature_config_from_context` * Update client.py * fix merge issues * Update config_helper.py * fix comments
- Loading branch information
1 parent
c21d89d
commit 799fac0
Showing
5 changed files
with
231 additions
and
408 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,193 @@ | ||
from feathr.definition.dtype import * | ||
from feathr.registry.registry_utils import * | ||
from feathr.utils._file_utils import write_to_file | ||
from feathr.definition.anchor import FeatureAnchor | ||
from feathr.constants import * | ||
from feathr.definition.feature import Feature, FeatureType,FeatureBase | ||
from feathr.definition.feature_derivations import DerivedFeature | ||
from feathr.definition.repo_definitions import RepoDefinitions | ||
from feathr.definition.source import HdfsSource, InputContext, JdbcSource, Source | ||
from feathr.definition.transformation import (ExpressionTransformation, Transformation, | ||
WindowAggTransformation) | ||
from feathr.definition.typed_key import TypedKey | ||
from feathr.registry.feature_registry import FeathrRegistry | ||
from feathr.definition.repo_definitions import RepoDefinitions | ||
from pathlib import Path | ||
from jinja2 import Template | ||
import sys | ||
from feathr.utils._file_utils import write_to_file | ||
import importlib | ||
import os | ||
|
||
class FeathrConfigHelper(object): | ||
def __init__(self) -> None: | ||
pass | ||
def _get_py_files(self, path: Path) -> List[Path]: | ||
"""Get all Python files under path recursively, excluding __init__.py""" | ||
py_files = [] | ||
for item in path.glob('**/*.py'): | ||
if "__init__.py" != item.name: | ||
py_files.append(item) | ||
return py_files | ||
|
||
def _convert_to_module_path(self, path: Path, workspace_path: Path) -> str: | ||
"""Convert a Python file path to its module path so that we can import it later""" | ||
prefix = os.path.commonprefix( | ||
[path.resolve(), workspace_path.resolve()]) | ||
resolved_path = str(path.resolve()) | ||
module_path = resolved_path[len(prefix): -len(".py")] | ||
# Convert features under nested folder to module name | ||
# e.g. /path/to/pyfile will become path.to.pyfile | ||
return ( | ||
module_path | ||
.lstrip('/') | ||
.replace("/", ".") | ||
) | ||
|
||
def _extract_features_from_context(self, anchor_list, derived_feature_list, result_path: Path) -> RepoDefinitions: | ||
"""Collect feature definitions from the context instead of python files""" | ||
definitions = RepoDefinitions( | ||
sources=set(), | ||
features=set(), | ||
transformations=set(), | ||
feature_anchors=set(), | ||
derived_features=set() | ||
) | ||
for derived_feature in derived_feature_list: | ||
if isinstance(derived_feature, DerivedFeature): | ||
definitions.derived_features.add(derived_feature) | ||
definitions.transformations.add( | ||
vars(derived_feature)["transform"]) | ||
else: | ||
raise RuntimeError(f"Please make sure you pass a list of `DerivedFeature` objects to the `derived_feature_list` argument. {str(type(derived_feature))} is detected.") | ||
|
||
for anchor in anchor_list: | ||
# obj is `FeatureAnchor` | ||
definitions.feature_anchors.add(anchor) | ||
# add the source section of this `FeatureAnchor` object | ||
definitions.sources.add(vars(anchor)['source']) | ||
for feature in vars(anchor)['features']: | ||
# get the transformation object from `Feature` or `DerivedFeature` | ||
if isinstance(feature, Feature): | ||
# feature is of type `Feature` | ||
definitions.features.add(feature) | ||
definitions.transformations.add(vars(feature)["transform"]) | ||
else: | ||
|
||
raise RuntimeError(f"Please make sure you pass a list of `Feature` objects. {str(type(feature))} is detected.") | ||
|
||
return definitions | ||
|
||
def _extract_features(self, workspace_path: Path) -> RepoDefinitions: | ||
"""Collect feature definitions from the python file, convert them into feature config and save them locally""" | ||
os.chdir(workspace_path) | ||
# Add workspace path to system path so that we can load features defined in Python via import_module | ||
sys.path.append(str(workspace_path)) | ||
definitions = RepoDefinitions( | ||
sources=set(), | ||
features=set(), | ||
transformations=set(), | ||
feature_anchors=set(), | ||
derived_features=set() | ||
) | ||
for py_file in self._get_py_files(workspace_path): | ||
module_path = self._convert_to_module_path(py_file, workspace_path) | ||
module = importlib.import_module(module_path) | ||
for attr_name in dir(module): | ||
obj = getattr(module, attr_name) | ||
if isinstance(obj, Source): | ||
definitions.sources.add(obj) | ||
elif isinstance(obj, Feature): | ||
definitions.features.add(obj) | ||
elif isinstance(obj, DerivedFeature): | ||
definitions.derived_features.add(obj) | ||
elif isinstance(obj, FeatureAnchor): | ||
definitions.feature_anchors.add(obj) | ||
elif isinstance(obj, Transformation): | ||
definitions.transformations.add(obj) | ||
return definitions | ||
|
||
def save_to_feature_config(self, workspace_path: Path, config_save_dir: Path): | ||
"""Save feature definition within the workspace into HOCON feature config files""" | ||
repo_definitions = self._extract_features(workspace_path) | ||
self._save_request_feature_config(repo_definitions, config_save_dir) | ||
self._save_anchored_feature_config(repo_definitions, config_save_dir) | ||
self._save_derived_feature_config(repo_definitions, config_save_dir) | ||
|
||
def save_to_feature_config_from_context(self, anchor_list, derived_feature_list, local_workspace_dir: Path): | ||
"""Save feature definition within the workspace into HOCON feature config files from current context, rather than reading from python files""" | ||
repo_definitions = self._extract_features_from_context( | ||
anchor_list, derived_feature_list, local_workspace_dir) | ||
self._save_request_feature_config(repo_definitions, local_workspace_dir) | ||
self._save_anchored_feature_config(repo_definitions, local_workspace_dir) | ||
self._save_derived_feature_config(repo_definitions, local_workspace_dir) | ||
|
||
def _save_request_feature_config(self, repo_definitions: RepoDefinitions, local_workspace_dir="./"): | ||
config_file_name = "feature_conf/auto_generated_request_features.conf" | ||
tm = Template( | ||
""" | ||
// THIS FILE IS AUTO GENERATED. PLEASE DO NOT EDIT. | ||
anchors: { | ||
{% for anchor in feature_anchors %} | ||
{% if anchor.source.name == "PASSTHROUGH" %} | ||
{{anchor.to_feature_config()}} | ||
{% endif %} | ||
{% endfor %} | ||
} | ||
""" | ||
) | ||
|
||
request_feature_configs = tm.render( | ||
feature_anchors=repo_definitions.feature_anchors) | ||
config_file_path = os.path.join(local_workspace_dir, config_file_name) | ||
write_to_file(content=request_feature_configs, | ||
full_file_name=config_file_path) | ||
|
||
@classmethod | ||
def _save_anchored_feature_config(self, repo_definitions: RepoDefinitions, local_workspace_dir="./"): | ||
config_file_name = "feature_conf/auto_generated_anchored_features.conf" | ||
tm = Template( | ||
""" | ||
// THIS FILE IS AUTO GENERATED. PLEASE DO NOT EDIT. | ||
anchors: { | ||
{% for anchor in feature_anchors %} | ||
{% if not anchor.source.name == "PASSTHROUGH" %} | ||
{{anchor.to_feature_config()}} | ||
{% endif %} | ||
{% endfor %} | ||
} | ||
sources: { | ||
{% for source in sources%} | ||
{% if not source.name == "PASSTHROUGH" %} | ||
{{source.to_feature_config()}} | ||
{% endif %} | ||
{% endfor %} | ||
} | ||
""" | ||
) | ||
anchored_feature_configs = tm.render(feature_anchors=repo_definitions.feature_anchors, | ||
sources=repo_definitions.sources) | ||
config_file_path = os.path.join(local_workspace_dir, config_file_name) | ||
write_to_file(content=anchored_feature_configs, | ||
full_file_name=config_file_path) | ||
|
||
@classmethod | ||
def _save_derived_feature_config(self, repo_definitions: RepoDefinitions, local_workspace_dir="./"): | ||
config_file_name = "feature_conf/auto_generated_derived_features.conf" | ||
tm = Template( | ||
""" | ||
anchors: {} | ||
derivations: { | ||
{% for derived_feature in derived_features %} | ||
{{derived_feature.to_feature_config()}} | ||
{% endfor %} | ||
} | ||
""" | ||
) | ||
derived_feature_configs = tm.render( | ||
derived_features=repo_definitions.derived_features) | ||
config_file_path = os.path.join(local_workspace_dir, config_file_name) | ||
write_to_file(content=derived_feature_configs, | ||
full_file_name=config_file_path) | ||
|
Oops, something went wrong.