Skip to content
This repository has been archived by the owner on Jan 9, 2024. It is now read-only.

Commit

Permalink
System Config (#99)
Browse files Browse the repository at this point in the history
* Add coverage level gate
* Add config parser
* Add config processing utility
* Refactor Encoder --> CategoricalEncoder
* Refactor SmartText --> TextEncoder
* Condense config files into setup.cfg until pyproject.toml is supported
* Remove .travis.yml
* Remove wily from deps
* Remove coveralls and patch pre-commit installation steps
* Address pytest deprecation warning
* Fix config parser docstring and newsfragment
* Patch failing tests and add reset config cache functionality
  • Loading branch information
adithyabsk committed Jul 25, 2019
1 parent 2304070 commit 702cec2
Show file tree
Hide file tree
Showing 33 changed files with 555 additions and 387 deletions.
13 changes: 0 additions & 13 deletions .coveragerc

This file was deleted.

15 changes: 0 additions & 15 deletions .flake8

This file was deleted.

10 changes: 1 addition & 9 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/asottile/seed-isort-config
rev: v1.9.1
rev: v1.9.2
hooks:
- id: seed-isort-config
- repo: https://github.com/pre-commit/mirrors-isort
Expand All @@ -17,11 +17,3 @@ repos:
rev: v2.2.3
hooks:
- id: flake8
- repo: local
hooks:
- id: wily
name: wily
entry: printf %s "y\ny\n" | wily diff
verbose: true
language: python
additional_dependencies: [wily]
15 changes: 0 additions & 15 deletions .readthedocs.yml

This file was deleted.

36 changes: 0 additions & 36 deletions .travis.yml

This file was deleted.

6 changes: 3 additions & 3 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ stages:

# Build
# Linux
# TODO: Turn off Linux Testing until https://github.com/sdispater/poetry/issues/1197 is resolved
# # scs install is broken on Linux
# - job: test_linux
# dependsOn: lint
# pool:
Expand Down Expand Up @@ -76,8 +76,8 @@ stages:
# brew install gcc@5
# displayName: Install Brew packages
# - template: ci/install.yml
# - script: $HOME/.poetry/bin/poetry build
# $HOME/.poetry/bin/poetry publish -u $(pypi_username) -p $(pypi_password)
# - script: poetry build
# poetry publish -u $(pypi_username) -p $(pypi_password)
# displayName: Release to pypi
# - task: GitHubRelease@0
# inputs:
Expand Down
24 changes: 15 additions & 9 deletions ci/install.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,23 @@ steps:
displayName: 'Use Python $(python.version)'
inputs:
versionSpec: '$(python.version)'
name: python3
- script: |
pip install --upgrade pip
curl -sSL https://raw.githubusercontent.com/sdispater/poetry/master/get-poetry.py | python
displayName: Install Poetry
- script: $HOME/.poetry/bin/poetry install -vvv -E dev
condition: eq( variables['Agent.OS'], 'Linux' )
displayName: Install all packages (Linux)
# TODO: Fix manual patch for pyrfr after https://github.com/automl/auto-sklearn/issues/688 is closed
# Unix
- bash: |
echo "##vso[task.prependpath]$HOME/.poetry/bin"
condition: or(eq( variables['Agent.OS'], 'Linux' ), eq( variables['Agent.OS'], 'Darwin' ))
displayName: Add Poetry to path (Unix)
- script: |
$HOME/.poetry/bin/poetry install -vvv
export CC=gcc-5; export CXX=g++-5; $HOME/.poetry/bin/poetry install -vvv -E dev
export CC=gcc-5; export CXX=g++-5; pip install --upgrade pyrfr
condition: eq( variables['Agent.OS'], 'Darwin' )
displayName: Install all packages (MacOS)
poetry --version
pip freeze --all
pip --version
poetry config settings.virtualenvs.create false
poetry install -vvv
poetry install -vvv
displayName: Install all packages
# TODO: Fix manual patch for pyrfr after https://github.com/automl/auto-sklearn/issues/688 is closed
# TODO: Temporary patch to make CI pass, install poetry twice to mask setuptools error https://github.com/sdispater/poetry/issues/1197 is resolved
2 changes: 1 addition & 1 deletion ci/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ steps:
versionSpec: '3.6'
- script: |
pip install pre-commit
pre-commit install-hooks
pre-commit install -f --install-hooks
pre-commit run --all-files
displayName: Running pre-commit on all files
17 changes: 9 additions & 8 deletions ci/tests.yml
Original file line number Diff line number Diff line change
@@ -1,14 +1,8 @@
steps:
- script: $HOME/.poetry/bin/poetry run pytest --junitxml=junit/test-results.xml --cov-report=xml
- script: poetry run pytest --junitxml=junit/test-results.xml --cov-report=xml
env:
FORESHADOW_TESTS: ALL
displayName: Run pytest
# TODO: Switch back to 'Linux' when https://github.com/sdispater/poetry/issues/1197 is resolved
- script: $HOME/.poetry/bin/poetry run coveralls
condition: and(succeeded(), eq( variables['Agent.OS'], 'Darwin' ))
env:
COVERALLS_REPO_TOKEN: $(coveralls_token)
displayName: Submit coveralls report
- task: PublishTestResults@2
inputs:
testResultsFiles: 'junit/test-results.xml'
Expand All @@ -18,4 +12,11 @@ steps:
inputs:
codeCoverageTool: Cobertura
summaryFileLocation: '$(System.DefaultWorkingDirectory)/**/coverage.xml'
reportDirectory: '$(System.DefaultWorkingDirectory)/**/htmlcov'
reportDirectory: '$(System.DefaultWorkingDirectory)/**/htmlcov'
- task: BuildQualityChecks@6
inputs:
checkCoverage: true
coverageFailOption: 'fixed'
coverageType: 'branches'
coverageThreshold: '90'
displayName: 'Check build quality'
109 changes: 109 additions & 0 deletions foreshadow/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
"""Foreshadow system config resolver."""

import os

import yaml

from foreshadow.utils import get_config_path, get_transformer


CONFIG_FILE_NAME = "config.yml"

DEFAULT_CONFIG = {
"cleaner": [],
"engineerer": {},
"preprocessor": {
"numerical": ["Imputer", "Scaler"],
"categorical": ["CategoricalEncoder"],
"text": ["TextEncoder"],
},
"reducer": {},
}

_cfg = {}


def get_config(base):
"""Try to load configuration data from specific folder path.
Args:
base (str): A base path that has a file called `config.yml`
Returns:
dict: If the file does not exist an empty dictionary is returned.
"""
data_file_path = os.path.join(base, CONFIG_FILE_NAME)
check_file = os.path.exists(data_file_path) and os.path.isfile(
data_file_path
)

if not check_file:
return {}
else:
with open(data_file_path) as fopen:
data = yaml.safe_load(fopen)
if data is None:
return {}
else:
return data


def reset_config():
"""Reset internal configuration
Note:
This is useful in an IDLE setting when the configuration file might
have been modified but you don't want to reload the system.
"""
global _cfg
_cfg = {}


def resolve_config():
"""Resolve the configuration to actual classes.
Note:
The order is resolution is as follows in increasing precedence order:
framework, user, local.
Returns:
A dictionary with the same keys as `foreshadow.config.DEFAULT_CONFIG`
with the correct overrides.
"""
default = DEFAULT_CONFIG
user = get_config(get_config_path())
local_path = os.path.abspath("")
local = get_config(local_path)

# import pdb; pdb.set_trace()

global _cfg
if local_path in _cfg:
return _cfg.get(local_path)

# Expand the dictionaries in order of precedence
_resolved = {**default, **user, **local}

resolved = {}
for step, data in _resolved.items():
if not len(data):
resolved[step] = data
elif isinstance(data, list):
resolved[step] = [
get_transformer(transformer) for transformer in data
]
elif isinstance(data, dict):
resolved[step] = {
intent: [
get_transformer(transformer)
for transformer in transformer_list
]
for intent, transformer_list in data.items()
}

_cfg[local_path] = resolved

return resolved
2 changes: 1 addition & 1 deletion foreshadow/console.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from foreshadow.estimators.auto import determine_problem_type


def generate_model(args):
def generate_model(args): # noqa: C901
"""Process command line args and generate a Foreshadow model to fit.
Args:
Expand Down
9 changes: 5 additions & 4 deletions foreshadow/core/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,14 @@ def _make_deserializable(data):
return data


def _pickle_cache_path(self, cache_path=None):
def _pickle_cache_path(cls_name, cache_path=None):
"""Get the pickle cache path of a transformer.
Uses a generated UUID and the class name to come up with a unique
filename.
Args:
cls_name (str): Name of the class to be pickled
cache_path (str, optional): override the default cache_path which
is in the root of the user's directory.
Expand All @@ -105,7 +106,7 @@ def _pickle_cache_path(self, cache_path=None):
if cache_path is None:
cache_path = get_cache_path()

fname = self.__class__.__name__ + uuid.uuid4().hex
fname = cls_name + uuid.uuid4().hex
fpath = "{}.pkl".format(fname)
path = os.path.join(cache_path, fpath)

Expand Down Expand Up @@ -325,7 +326,7 @@ def disk_serialize(self, cache_path=None):
str: The path the data was saved to.
"""
fpath = _pickle_cache_path(cache_path)
fpath = _pickle_cache_path(self.__class__.__name__, cache_path)
with open(fpath, "wb+") as fopen:
pickle.dump(self, fopen, protocol=pickle.HIGHEST_PROTOCOL)

Expand Down Expand Up @@ -434,7 +435,7 @@ def dict_serialize(self, deep=False):
Note:
This recursively serializes the individual steps to facilitate a
human readabel form.
human readable form.
Args:
deep (bool): If True, will return the parameters for this estimator
Expand Down
8 changes: 4 additions & 4 deletions foreshadow/intents/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@
from foreshadow.intents.base import BaseIntent, PipelineTemplateEntry
from foreshadow.transformers.concrete import DropFeature
from foreshadow.transformers.smart import (
Encoder,
CategoricalEncoder,
MultiImputer,
Scaler,
SimpleImputer,
SmartText,
TextEncoder,
)


Expand Down Expand Up @@ -198,7 +198,7 @@ class CategoricalIntent(GenericIntent):

single_pipeline_template = [
PipelineTemplateEntry("dropper", DropFeature, False),
PipelineTemplateEntry("impute_encode", Encoder, True),
PipelineTemplateEntry("impute_encode", CategoricalEncoder, True),
]
"""Encode the column automatically"""

Expand Down Expand Up @@ -248,7 +248,7 @@ class TextIntent(GenericIntent):
"""No children"""

single_pipeline_template = [
PipelineTemplateEntry("text", SmartText, False)
PipelineTemplateEntry("text", TextEncoder, False)
]
"""Encodes the column automatically"""

Expand Down

0 comments on commit 702cec2

Please sign in to comment.