Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move schema loading/validating code out of config #9371

Merged
merged 15 commits into from Mar 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
215 changes: 89 additions & 126 deletions lib/galaxy/config/__init__.py
Expand Up @@ -100,11 +100,24 @@


def find_root(kwargs):
root = os.path.abspath(kwargs.get('root_dir', '.'))
return root
return os.path.abspath(kwargs.get('root_dir', '.'))


class BaseAppConfiguration(object):
# Override in subclasses (optional): {KEY: config option, VALUE: deprecated directory name}
# If VALUE == first directory in a user-supplied path that resolves to KEY, it will be stripped from that path
deprecated_dirs = None
natefoo marked this conversation as resolved.
Show resolved Hide resolved

def __init__(self, **kwargs):
self.config_dict = kwargs
self.root = find_root(kwargs)
self._set_config_base(kwargs)
self.schema = self._load_schema() # Load schema from schema definition file
self._raw_config = self.schema.defaults.copy() # Save schema defaults as initial config values (raw_config)
self._update_raw_config_from_kwargs(kwargs) # Overwrite raw_config with values passed in kwargs
self._create_attributes_from_raw_config() # Create attributes based on raw_config
self._resolve_paths() # Overwrite attribute values with resolved paths

def _set_config_base(self, config_kwargs):

def _set_global_conf():
Expand Down Expand Up @@ -163,6 +176,75 @@ def _set_config_directories():
_set_global_conf()
_set_config_directories()

def _load_schema(self):
# Override in subclasses
raise Exception('Not implemented')

def _update_raw_config_from_kwargs(self, kwargs):

def convert_datatype(key, value):
datatype = self.schema.app_schema[key].get('type')
# check for `not None` explicitly (value can be falsy)
if value is not None and datatype in type_converters:
return type_converters[datatype](value)
return value

def strip_deprecated_dir(key, value):
resolves_to = self.schema.paths_to_resolve.get(key)
if resolves_to: # value is a path that will be resolved
first_dir = value.split(os.sep)[0] # get first directory component
if first_dir == self.deprecated_dirs.get(resolves_to): # first_dir is deprecated for this option
ignore = first_dir + os.sep
log.warning(
"Paths for the '%s' option are now relative to '%s', remove the leading '%s' "
"to suppress this warning: %s", key, resolves_to, ignore, value
)
return value[len(ignore):]
return value

type_converters = {'bool': string_as_bool, 'int': int, 'float': float, 'str': str}

for key, value in kwargs.items():
if key in self.schema.app_schema:
value = convert_datatype(key, value)
if value and self.deprecated_dirs:
value = strip_deprecated_dir(key, value)
self._raw_config[key] = value

def _create_attributes_from_raw_config(self):
# `base_configs` are a special case: these attributes have been created and will be ignored
# by the code below. Trying to overwrite any other existing attributes will raise an error.
base_configs = {'config_dir', 'data_dir', 'managed_config_dir'}
for key, value in self._raw_config.items():
if not hasattr(self, key):
setattr(self, key, value)
elif key not in base_configs:
raise ConfigurationError("Attempting to override existing attribute '%s'" % key)

def _resolve_paths(self):

def resolve(key):
if key in _cache: # resolve each path only once
return _cache[key]

path = getattr(self, key) # path prior to being resolved
parent = self.schema.paths_to_resolve.get(key)
if not parent: # base case: nothing else needs resolving
return path
parent_path = resolve(parent) # recursively resolve parent path
if path is not None:
path = os.path.join(parent_path, path) # resolve path
else:
path = parent_path # or use parent path

setattr(self, key, path) # update property
_cache[key] = path # cache it!
return path

_cache = {}
for key in self.schema.paths_to_resolve:
resolve(key)

def _in_managed_config_dir(self, path):
return os.path.join(self.managed_config_dir, path)

Expand Down Expand Up @@ -212,133 +294,14 @@ def _parse_config_file_options(self, defaults, listify_defaults, config_kwargs):
class GalaxyAppConfiguration(BaseAppConfiguration):
deprecated_options = ('database_file', 'track_jobs_in_database')
default_config_file_name = 'galaxy.yml'
# {key: config option, value: deprecated directory name}
# If value == first dir in a user path that resolves to key, it will be stripped from the path
deprecated_dirs = {'config_dir': 'config', 'data_dir': 'database'}

def __init__(self, **kwargs):
self._load_schema() # Load schema from schema definition file
self._load_config_from_schema() # Load default propery values from schema
self._validate_schema_paths() # check that paths can be resolved
self._update_raw_config_from_kwargs(kwargs) # Overwrite default values passed as kwargs
self._create_attributes_from_raw_config() # Create attributes for LOADED properties

self.config_dict = kwargs
self.root = find_root(kwargs)
self._set_config_base(kwargs) # must be called prior to _resolve_paths()

self._resolve_paths(kwargs) # Overwrite attributes (not _raw_config) w/resolved paths
self._process_config(kwargs) # Finish processing configuration
super(GalaxyAppConfiguration, self).__init__(**kwargs)
self._process_config(kwargs)

def _load_schema(self):
self.schema = AppSchema(GALAXY_CONFIG_SCHEMA_PATH, GALAXY_APP_NAME)
self.appschema = self.schema.app_schema

def _load_config_from_schema(self):
self._raw_config = {} # keeps track of startup values (kwargs or schema default)
self.reloadable_options = set() # config options we can reload at runtime
self._paths_to_resolve = {} # {config option: referenced config option}
for key, data in self.appschema.items():
self._raw_config[key] = data.get('default')
if data.get('reloadable'):
self.reloadable_options.add(key)
if data.get('path_resolves_to'):
self._paths_to_resolve[key] = data.get('path_resolves_to')

def _validate_schema_paths(self):

def check_exists(option, key):
if not option:
message = "Invalid schema: property '{}' listed as path resolution target " \
"for '{}' does not exist".format(resolves_to, key)
raise_error(message)

def check_type_is_str(option, key):
if option.get('type') != 'str':
message = "Invalid schema: property '{}' should have type 'str'".format(key)
raise_error(message)

def check_is_dag():
visited = set()
for key in self._paths_to_resolve:
visited.clear()
while key:
visited.add(key)
key = self.appschema[key].get('path_resolves_to')
if key and key in visited:
raise_error('Invalid schema: cycle detected')

def raise_error(message):
log.error(message)
raise ConfigurationError(message)

for key, resolves_to in self._paths_to_resolve.items():
parent = self.appschema.get(resolves_to)
check_exists(parent, key)
check_type_is_str(parent, key)
check_type_is_str(self.appschema[key], key)
check_is_dag() # must be called last: walks entire graph

def _update_raw_config_from_kwargs(self, kwargs):

def convert_datatype(key, value):
datatype = self.appschema[key].get('type')
# check for `not None` explicitly (value can be falsy)
if value is not None and datatype in type_converters:
return type_converters[datatype](value)
return value

def strip_deprecated_dir(key, value):
resolves_to = self.appschema[key].get('path_resolves_to')
if resolves_to: # value is a path that will be resolved
first_dir = value.split(os.sep)[0] # get first directory component
if first_dir == self.deprecated_dirs[resolves_to]: # first_dir is deprecated for this option
ignore = first_dir + os.sep
log.warning(
"Paths for the '%s' option are now relative to '%s', remove the leading '%s' "
"to suppress this warning: %s", key, resolves_to, ignore, value
)
return value[len(ignore):]
return value

type_converters = {'bool': string_as_bool, 'int': int, 'float': float, 'str': str}

for key, value in kwargs.items():
if key in self.appschema:
value = convert_datatype(key, value)
if value:
value = strip_deprecated_dir(key, value)
self._raw_config[key] = value

def _create_attributes_from_raw_config(self):
for key, value in self._raw_config.items():
if hasattr(self, key):
raise ConfigurationError("Attempting to override existing attribute '%s'" % key)
setattr(self, key, value)

def _resolve_paths(self, kwargs):

def resolve(key):
if key in _cache: # resolve each path only once
return _cache[key]

path = getattr(self, key) # path prior to being resolved
parent = self.appschema[key].get('path_resolves_to')
if not parent: # base case: nothing else needs resolving
return path
parent_path = resolve(parent) # recursively resolve parent path
if path is not None:
path = os.path.join(parent_path, path) # resolve path
else:
path = parent_path # or use parent path

setattr(self, key, path) # update property
_cache[key] = path # cache it!
return path

_cache = {}
for key in self._paths_to_resolve:
resolve(key)
return AppSchema(GALAXY_CONFIG_SCHEMA_PATH, GALAXY_APP_NAME)

def _process_config(self, kwargs):
# Resolve paths of other config files
Expand Down Expand Up @@ -526,7 +489,7 @@ def _process_config(self, kwargs):
if self.tool_dependency_dir and self.tool_dependency_dir.lower() == 'none':
self.tool_dependency_dir = None
if self.involucro_path is None:
target_dir = self.tool_dependency_dir or self.appschema['tool_dependency_dir'].get('default')
target_dir = self.tool_dependency_dir or self.schema.defaults['tool_dependency_dir']
self.involucro_path = os.path.join(self.data_dir, target_dir, "involucro")
self.involucro_path = os.path.join(self.root, self.involucro_path)
if self.mulled_channels:
Expand Down Expand Up @@ -897,7 +860,7 @@ def parse(string):
def reload_config_options(current_config):
""" Reload modified reloadable config options """
modified_config = read_properties_from_file(current_config.config_file)
for option in current_config.reloadable_options:
for option in current_config.schema.reloadable_options:
if option in modified_config:
# compare to raw value, as that one is set only on load and reload
if current_config._raw_config[option] != modified_config[option]:
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/config/config_manage.py
Expand Up @@ -30,12 +30,12 @@
from galaxy.config import GALAXY_CONFIG_SCHEMA_PATH
from galaxy.config.schema import (
AppSchema,
OPTION_DEFAULTS,
Schema,
)
from galaxy.util import safe_makedirs
from galaxy.util.properties import nice_config_parser
from galaxy.util.yaml_util import (
OPTION_DEFAULTS,
ordered_dump,
ordered_load,
)
Expand Down
78 changes: 74 additions & 4 deletions lib/galaxy/config/schema.py
@@ -1,8 +1,16 @@
from galaxy.util.yaml_util import (
OPTION_DEFAULTS,
ordered_load,
)
import logging

from galaxy.exceptions import ConfigurationError
from galaxy.util.yaml_util import ordered_load

log = logging.getLogger(__name__)

OPTION_DEFAULTS = {
"type": "str",
"unknown_option": False,
"default": None,
"desc": None,
}

UNKNOWN_OPTION = {
"type": "str",
Expand Down Expand Up @@ -33,8 +41,70 @@ def __init__(self, schema_path, app_name):
self.raw_schema = self._read_schema(schema_path)
self.description = self.raw_schema.get("desc", None)
app_schema = self.raw_schema['mapping'][app_name]['mapping']
self._preprocess(app_schema)
super(AppSchema, self).__init__(app_schema)

def _read_schema(self, path):
with open(path, "r") as f:
return ordered_load(f)

def _preprocess(self, app_schema):
"""Populate schema collections used for app configuration."""
self._defaults = {} # {config option: default value or null}
self._reloadable_options = set() # config options we can reload at runtime
self._paths_to_resolve = {} # {config option: referenced config option}
for key, data in app_schema.items():
self._defaults[key] = data.get('default')
if data.get('reloadable'):
self._reloadable_options.add(key)
if data.get('path_resolves_to'):
self._paths_to_resolve[key] = data.get('path_resolves_to')

@property
def defaults(self):
return self._defaults

@property
def paths_to_resolve(self):
return self._paths_to_resolve

@property
def reloadable_options(self):
return self._reloadable_options

def validate_path_resolution_graph(self):
"""This method is for tests only: we SHOULD validate the schema's path resolution graph
as part of automated testing; but we should NOT validate it at runtime.
"""
def check_exists(option, key):
if not option:
message = "Invalid schema: property '{}' listed as path resolution target " \
"for '{}' does not exist".format(resolves_to, key)
raise_error(message)

def check_type_is_str(option, key):
if option.get('type') != 'str':
message = "Invalid schema: property '{}' should have type 'str'".format(key)
raise_error(message)

def check_is_dag():
visited = set()
for key in self.paths_to_resolve:
visited.clear()
while key:
visited.add(key)
key = self.app_schema[key].get('path_resolves_to')
if key and key in visited:
raise_error('Invalid schema: cycle detected')

def raise_error(message):
log.error(message)
raise ConfigurationError(message)

for key, resolves_to in self.paths_to_resolve.items():
print(key)
parent = self.app_schema.get(resolves_to)
check_exists(parent, key)
check_type_is_str(parent, key)
check_type_is_str(self.app_schema[key], key)
check_is_dag() # must be called last: walks entire graph
7 changes: 4 additions & 3 deletions lib/galaxy/tools/toolbox/base.py
Expand Up @@ -1217,9 +1217,10 @@ def _init_dependency_manager(self):
return
app_config_dict = self.app.config.config_dict
conf_file = app_config_dict.get("dependency_resolvers_config_file")
default_tool_dependency_dir = os.path.join(self.app.config.data_dir, self.app.config.appschema['tool_dependency_dir'].get('default'))
self.dependency_manager = build_dependency_manager(app_config_dict=app_config_dict, conf_file=conf_file,
default_tool_dependency_dir=default_tool_dependency_dir)
default_tool_dependency_dir = os.path.join(
self.app.config.data_dir, self.app.config.schema.defaults['tool_dependency_dir'])
self.dependency_manager = build_dependency_manager(
app_config_dict=app_config_dict, conf_file=conf_file, default_tool_dependency_dir=default_tool_dependency_dir)

def reload_dependency_manager(self):
self._init_dependency_manager()
7 changes: 0 additions & 7 deletions lib/galaxy/util/yaml_util.py
Expand Up @@ -10,13 +10,6 @@

log = logging.getLogger(__name__)

OPTION_DEFAULTS = {
"type": "str",
"unknown_option": False,
"default": None,
"desc": None,
}


class OrderedLoader(yaml.SafeLoader):
# This class was pulled out of ordered_load() for the sake of
Expand Down