diff --git a/knit/core.py b/knit/core.py index a64f1b1..5736202 100644 --- a/knit/core.py +++ b/knit/core.py @@ -6,7 +6,7 @@ import logging from subprocess import Popen, PIPE -from .utils import conf_find +from .utils import parse_xml from .env import CondaCreator from .compatibility import FileNotFoundError, urlparse from .exceptions import HDFSConfigException @@ -95,16 +95,7 @@ def _yarn_conf(self, autodetect=False): yarn_site = os.path.join(confd, 'yarn-site.xml') try: with open(yarn_site, 'r') as f: - url = conf_find(f, 'yarn.resourcemanager.webapp.address') - if url: - u = urlparse(url) - - # handle host:port with no :// preabmle - if u.path == url: - conf['host'], conf['port'] = url.split(':') - else: - conf['host'] = u.hostname - conf['port'] = u.port + conf = parse_xml(f, 'yarn.resourcemanager.webapp.address') except FileNotFoundError: pass finally: @@ -143,16 +134,7 @@ def _hdfs_conf(self, autodetect=False): try: with open(core_site, 'r') as f: - url = conf_find(f, 'fs.defaultFS') - if url: - u = urlparse(url) - - # handle host:port with no :// preabmle - if u.path == url: - conf['host'], conf['port'] = url.split(':') - else: - conf['host'] = u.hostname - conf['port'] = u.port + conf = parse_xml(core_site, 'fs.defaultFS') except FileNotFoundError: pass diff --git a/knit/tests/test_utils.py b/knit/tests/test_utils.py index 17e7cc4..297b165 100644 --- a/knit/tests/test_utils.py +++ b/knit/tests/test_utils.py @@ -1,12 +1,11 @@ import os import logging -from lxml import etree -from knit.utils import conf_find, set_logging -from knit.exceptions import HDFSConfigException +from knit.utils import conf_find, set_logging, parse_xml cur_dir = os.path.dirname(__file__) core_site = os.path.join(cur_dir, 'files', 'core-site.xml') +yarn_site = os.path.join(cur_dir, 'files', 'yarn-site.xml') def check_docker(): """check if inside docker container""" @@ -18,6 +17,8 @@ def check_docker(): def test_conf_parse(): assert 'hdfs://knit-host:9000' == conf_find(core_site, 'fs.defaultFS') assert '' == conf_find(core_site, 'FOO/BAR') + conf = parse_xml(yarn_site, 'yarn.resourcemanager.webapp.address') + assert conf == {'port': '8088', 'host': 'knit-host'} def test_set_logging(): diff --git a/knit/utils.py b/knit/utils.py index 84ff5cb..bbc3913 100644 --- a/knit/utils.py +++ b/knit/utils.py @@ -6,7 +6,7 @@ from lxml import etree -from .compatibility import FileNotFoundError, urlparse +from .compatibility import urlparse format = ('%(asctime)s - %(name)s - %(levelname)s - %(message)s') logging.basicConfig(format=format, level=logging.INFO) @@ -17,6 +17,21 @@ def set_logging(level): logger.setLevel(level) +def parse_xml(f, search_string=''): + conf = {} + url = conf_find(f, search_string) + if url: + u = urlparse(url) + + # handle host:port with no :// preabmle + if u.path == url: + conf['host'], conf['port'] = url.split(':') + else: + conf['host'] = u.hostname + conf['port'] = u.port + return conf + + def conf_find(fp='', name=''): """ Utility function to help parse hadoop configuration files.