Skip to content
This repository has been archived by the owner on Aug 13, 2018. It is now read-only.

Commit

Permalink
refactor xml parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
quasiben committed Feb 8, 2016
1 parent c32a44e commit 8bf602b
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 25 deletions.
24 changes: 3 additions & 21 deletions knit/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import logging
from subprocess import Popen, PIPE

from .utils import conf_find
from .utils import parse_xml
from .env import CondaCreator
from .compatibility import FileNotFoundError, urlparse
from .exceptions import HDFSConfigException
Expand Down Expand Up @@ -95,16 +95,7 @@ def _yarn_conf(self, autodetect=False):
yarn_site = os.path.join(confd, 'yarn-site.xml')
try:
with open(yarn_site, 'r') as f:
url = conf_find(f, 'yarn.resourcemanager.webapp.address')
if url:
u = urlparse(url)

# handle host:port with no :// preabmle
if u.path == url:
conf['host'], conf['port'] = url.split(':')
else:
conf['host'] = u.hostname
conf['port'] = u.port
conf = parse_xml(f, 'yarn.resourcemanager.webapp.address')
except FileNotFoundError:
pass
finally:
Expand Down Expand Up @@ -143,16 +134,7 @@ def _hdfs_conf(self, autodetect=False):

try:
with open(core_site, 'r') as f:
url = conf_find(f, 'fs.defaultFS')
if url:
u = urlparse(url)

# handle host:port with no :// preabmle
if u.path == url:
conf['host'], conf['port'] = url.split(':')
else:
conf['host'] = u.hostname
conf['port'] = u.port
conf = parse_xml(core_site, 'fs.defaultFS')
except FileNotFoundError:
pass

Expand Down
7 changes: 4 additions & 3 deletions knit/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import os
import logging
from lxml import etree

from knit.utils import conf_find, set_logging
from knit.exceptions import HDFSConfigException
from knit.utils import conf_find, set_logging, parse_xml

cur_dir = os.path.dirname(__file__)
core_site = os.path.join(cur_dir, 'files', 'core-site.xml')
yarn_site = os.path.join(cur_dir, 'files', 'yarn-site.xml')

def check_docker():
"""check if inside docker container"""
Expand All @@ -18,6 +17,8 @@ def check_docker():
def test_conf_parse():
assert 'hdfs://knit-host:9000' == conf_find(core_site, 'fs.defaultFS')
assert '' == conf_find(core_site, 'FOO/BAR')
conf = parse_xml(yarn_site, 'yarn.resourcemanager.webapp.address')
assert conf == {'port': '8088', 'host': 'knit-host'}


def test_set_logging():
Expand Down
17 changes: 16 additions & 1 deletion knit/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from lxml import etree

from .compatibility import FileNotFoundError, urlparse
from .compatibility import urlparse

format = ('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logging.basicConfig(format=format, level=logging.INFO)
Expand All @@ -17,6 +17,21 @@ def set_logging(level):
logger.setLevel(level)


def parse_xml(f, search_string=''):
conf = {}
url = conf_find(f, search_string)
if url:
u = urlparse(url)

# handle host:port with no :// preabmle
if u.path == url:
conf['host'], conf['port'] = url.split(':')
else:
conf['host'] = u.hostname
conf['port'] = u.port
return conf


def conf_find(fp='', name=''):
"""
Utility function to help parse hadoop configuration files.
Expand Down

0 comments on commit 8bf602b

Please sign in to comment.