From 4fb1c2fe324503e43b9156b210fa098324853514 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20HUBSCHER?= Date: Thu, 13 Nov 2014 11:47:54 +0100 Subject: [PATCH 1/3] =?UTF-8?q?Bug=20149=20=E2=80=94=20Support=20RFC-1738?= =?UTF-8?q?=20URLs.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- elasticsearch/client/__init__.py | 25 +++++++++++++++++----- test_elasticsearch/test_client/__init__.py | 11 ++++++++-- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/elasticsearch/client/__init__.py b/elasticsearch/client/__init__.py index dbdbb3798..0421691c5 100644 --- a/elasticsearch/client/__init__.py +++ b/elasticsearch/client/__init__.py @@ -1,3 +1,4 @@ +from __future__ import unicode_literals import weakref import logging @@ -30,22 +31,36 @@ def _normalize_hosts(hosts): # normalize hosts to dicts for i, host in enumerate(hosts): if isinstance(host, string_types): + h = {} + host = host.strip('/') - # remove schema information - if '://' in host: + + # Detects https schema + if host.startswith('https://'): + h['port'] = 443 + h['use_ssl'] = True + else: logger.warning( "List of nodes should not include schema information (http://): %r.", host ) + + # Remove schema information + if '://' in host: host = host[host.index('://') + 3:] - h = {"host": host} + # Detects auth urls + if '@' in host: + h['http_auth'], host = host.split('@', 1) + + # Detects port in host if ':' in host: - # TODO: detect auth urls host, port = host.rsplit(':', 1) if port.isdigit(): port = int(port) - h = {"host": host, "port": port} + h['port'] = port + + h['host'] = host out.append(h) else: out.append(host) diff --git a/test_elasticsearch/test_client/__init__.py b/test_elasticsearch/test_client/__init__.py index a9f6b9f13..111009227 100644 --- a/test_elasticsearch/test_client/__init__.py +++ b/test_elasticsearch/test_client/__init__.py @@ -13,12 +13,19 @@ def test_none_uses_defaults(self): def test_strings_are_used_as_hostnames(self): self.assertEquals([{"host": "elasticsearch.org"}], _normalize_hosts(["elasticsearch.org"])) - def test_strings_are_parsed_for_port(self): + def test_strings_are_parsed_for_port_and_user(self): self.assertEquals( - [{"host": "elasticsearch.org", "port": 42}, {"host": "user:secret@elasticsearch.com"}], + [{"host": "elasticsearch.org", "port": 42}, {"host": "elasticsearch.com", "http_auth": "user:secret"}], _normalize_hosts(["elasticsearch.org:42", "user:secret@elasticsearch.com"]) ) + def test_strings_are_parsed_for_scheme(self): + self.assertEquals( + [{"host": "elasticsearch.org", "port": 42, "use_ssl": True}, + {"host": "elasticsearch.com", "http_auth": "user:secret", "use_ssl": True, "port": 443}], + _normalize_hosts(["https://elasticsearch.org:42", "https://user:secret@elasticsearch.com"]) + ) + def test_dicts_are_left_unchanged(self): self.assertEquals([{"host": "local", "extra": 123}], _normalize_hosts([{"host": "local", "extra": 123}])) From 82d6046abc747a0170724a788b87dba2194dee69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20HUBSCHER?= Date: Thu, 13 Nov 2014 16:27:36 +0100 Subject: [PATCH 2/3] Refactor to use urlparse. --- elasticsearch/client/__init__.py | 38 ++++++++++++++------------------ elasticsearch/compat.py | 3 ++- 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/elasticsearch/client/__init__.py b/elasticsearch/client/__init__.py index 0421691c5..36d36eb63 100644 --- a/elasticsearch/client/__init__.py +++ b/elasticsearch/client/__init__.py @@ -4,7 +4,7 @@ from ..transport import Transport from ..exceptions import NotFoundError, TransportError -from ..compat import string_types +from ..compat import string_types, urlparse from .indices import IndicesClient from .cluster import ClusterClient from .cat import CatClient @@ -31,36 +31,30 @@ def _normalize_hosts(hosts): # normalize hosts to dicts for i, host in enumerate(hosts): if isinstance(host, string_types): - h = {} + added = False - host = host.strip('/') + if '://' not in host: + host = "http://%s" % host + added = True - # Detects https schema - if host.startswith('https://'): - h['port'] = 443 + parsed_url = urlparse(host) + h = {"host": parsed_url.hostname} + + if parsed_url.port: + h["port"] = parsed_url.port + + if parsed_url.scheme == "https": + h['port'] = parsed_url.port or 443 h['use_ssl'] = True - else: + elif parsed_url.scheme == "http" and not added: logger.warning( "List of nodes should not include schema information (http://): %r.", host ) - # Remove schema information - if '://' in host: - host = host[host.index('://') + 3:] - - # Detects auth urls - if '@' in host: - h['http_auth'], host = host.split('@', 1) - - # Detects port in host - if ':' in host: - host, port = host.rsplit(':', 1) - if port.isdigit(): - port = int(port) - h['port'] = port + if parsed_url.username or parsed_url.password: + h['http_auth'] = '%s:%s' % (parsed_url.username, parsed_url.password) - h['host'] = host out.append(h) else: out.append(host) diff --git a/elasticsearch/compat.py b/elasticsearch/compat.py index 5004096e7..deee3c524 100644 --- a/elasticsearch/compat.py +++ b/elasticsearch/compat.py @@ -5,8 +5,9 @@ if PY2: string_types = basestring, from urllib import quote_plus, urlencode + from urlparse import urlparse from itertools import imap as map else: string_types = str, bytes - from urllib.parse import quote_plus, urlencode + from urllib.parse import quote_plus, urlencode, urlparse map = map From 2850245bea11e0f05477e378a490f967258b2101 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20HUBSCHER?= Date: Thu, 13 Nov 2014 16:48:59 +0100 Subject: [PATCH 3/3] @HonzaKral review. --- elasticsearch/client/__init__.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/elasticsearch/client/__init__.py b/elasticsearch/client/__init__.py index 36d36eb63..b7508d85c 100644 --- a/elasticsearch/client/__init__.py +++ b/elasticsearch/client/__init__.py @@ -31,11 +31,8 @@ def _normalize_hosts(hosts): # normalize hosts to dicts for i, host in enumerate(hosts): if isinstance(host, string_types): - added = False - if '://' not in host: - host = "http://%s" % host - added = True + host = "//%s" % host parsed_url = urlparse(host) h = {"host": parsed_url.hostname} @@ -46,7 +43,7 @@ def _normalize_hosts(hosts): if parsed_url.scheme == "https": h['port'] = parsed_url.port or 443 h['use_ssl'] = True - elif parsed_url.scheme == "http" and not added: + elif parsed_url.scheme == "http": logger.warning( "List of nodes should not include schema information (http://): %r.", host