Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

The protocol scheme was added in the phase of finding the active RM. #67

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 7 additions & 8 deletions tests/test_hadoop_conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import mock
from mock import patch
from requests import RequestException
from tests import TestCase

import requests_mock
Expand Down Expand Up @@ -103,7 +104,7 @@ def test_get_resource_endpoint(self):

endpoint = hadoop_conf.get_resource_manager_endpoint()

self.assertEqual('example.com:8022', endpoint)
self.assertEqual('http://example.com:8022', endpoint)
parse_mock.assert_called_with(hadoop_conf_path + 'yarn-site.xml',
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why are these tests not failing without these changes?

Copy link
Collaborator

@dimon222 dimon222 Dec 29, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the test changed because result value now will include schema (in same PR, scroll down a bit)
https://github.com/toidi/hadoop-yarn-api-python-client/blob/f88ac6966eff575b18563ef8829967546cf3cc3f/yarn_api_client/hadoop_conf.py#L43

'yarn.resourcemanager.webapp.address')

Expand All @@ -122,7 +123,7 @@ def test_get_resource_endpoint_with_ha(self, check_is_active_rm_mock, parse_mock
check_is_active_rm_mock.return_value = True
endpoint = hadoop_conf.get_resource_manager_endpoint()

self.assertEqual('example.com:8022', endpoint)
self.assertEqual('http://example.com:8022', endpoint)
parse_mock.assert_called_with(hadoop_conf_path + 'yarn-site.xml',
'yarn.resourcemanager.webapp.address.rm1')

Expand Down Expand Up @@ -171,23 +172,21 @@ def test_check_is_active_rm(self, is_https_only_mock):

# Emulate requests library exception (socket timeout, etc)
with requests_mock.mock() as requests_get_mock:
requests_get_mock.side_effect = Exception('error')
# requests_get_mock.get('https://example2:8022/cluster', status_code=200)
requests_get_mock.return_value = None
self.assertFalse(hadoop_conf.check_is_active_rm('https://example2:8022'))
requests_get_mock.get('example2:8022/cluster', exc=RequestException)
self.assertFalse(hadoop_conf.check_is_active_rm('example2:8022'))

def test_get_resource_manager(self):
with patch('yarn_api_client.hadoop_conf.parse') as parse_mock:
parse_mock.return_value = 'example.com:8022'

endpoint = hadoop_conf._get_resource_manager(hadoop_conf.CONF_DIR, None)

self.assertEqual('example.com:8022', endpoint)
self.assertEqual('http://example.com:8022', endpoint)
parse_mock.assert_called_with(hadoop_conf_path + 'yarn-site.xml', 'yarn.resourcemanager.webapp.address')

endpoint = hadoop_conf._get_resource_manager(hadoop_conf.CONF_DIR, 'rm1')

self.assertEqual(('example.com:8022'), endpoint)
self.assertEqual(('http://example.com:8022'), endpoint)
parse_mock.assert_called_with(hadoop_conf_path + 'yarn-site.xml', 'yarn.resourcemanager.webapp.address.rm1')

parse_mock.reset_mock()
Expand Down
10 changes: 6 additions & 4 deletions yarn_api_client/hadoop_conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ def _is_https_only():

def _get_resource_manager(hadoop_conf_path, rm_id=None):
# compose property name based on policy (and rm_id)
if _is_https_only():
is_https_only = _is_https_only()

if is_https_only:
prop_name = 'yarn.resourcemanager.webapp.https.address'
else:
prop_name = 'yarn.resourcemanager.webapp.address'
Expand All @@ -43,15 +45,15 @@ def _get_resource_manager(hadoop_conf_path, rm_id=None):
if rm_id:
prop_name = "{name}.{rm_id}".format(name=prop_name, rm_id=rm_id)

rm_webapp_address = parse(os.path.join(hadoop_conf_path, 'yarn-site.xml'), prop_name)
rm_address = parse(os.path.join(hadoop_conf_path, 'yarn-site.xml'), prop_name)

return rm_webapp_address or None
return ('https://' if is_https_only else 'http://') + rm_address if rm_address else None


def check_is_active_rm(url, timeout=30, auth=None, verify=True):
try:
response = requests.get(url + "/cluster", timeout=timeout, auth=auth, verify=verify)
except Exception as e:
except requests.RequestException as e:
log.warning("Exception encountered accessing RM '{url}': '{err}', continuing...".format(url=url, err=e))
return False

Expand Down