Skip to content

Commit

Permalink
Adding GZip support to urllib3 (#704)
Browse files Browse the repository at this point in the history
* Adding GZip support to urllib3

* Adding compression documentation and example

* Convert to lowercase for consistency

* Moving header manipulation to __init__()

* Validating headers for compression

* Moving body compression out of the headers block

* Don't compress if there is no body

* Infer true
  • Loading branch information
robgil authored and fxdgear committed Mar 13, 2018
1 parent c354429 commit 64c125d
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 1 deletion.
11 changes: 11 additions & 0 deletions docs/index.rst
Expand Up @@ -258,6 +258,17 @@ bodies via post::
from elasticsearch import Elasticsearch
es = Elasticsearch(send_get_body_as='POST')

Compression
~~~~~~~~~~~
When using capacity constrained networks (low throughput), it may be handy to enable
compression. This is especially useful when doing bulk loads or inserting large
documents. This will configure compression on the *request*.
::

from elasticsearch import Elasticsearch
es = Elasticsearch(hosts, http_compress = True)


Running on AWS with IAM
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Expand Down
11 changes: 10 additions & 1 deletion elasticsearch/connection/http_urllib3.py
Expand Up @@ -3,6 +3,7 @@
import urllib3
from urllib3.exceptions import ReadTimeoutError, SSLError as UrllibSSLError
import warnings
import gzip

# sentinal value for `verify_certs`.
# This is used to detect if a user is passing in a value for `verify_certs`
Expand Down Expand Up @@ -62,13 +63,15 @@ class Urllib3HttpConnection(Connection):
host. See https://urllib3.readthedocs.io/en/1.4/pools.html#api for more
information.
:arg headers: any custom http headers to be add to requests
:arg http_compress: Use gzip compression
"""
def __init__(self, host='localhost', port=9200, http_auth=None,
use_ssl=False, verify_certs=VERIFY_CERTS_DEFAULT, ca_certs=None, client_cert=None,
client_key=None, ssl_version=None, ssl_assert_hostname=None,
ssl_assert_fingerprint=None, maxsize=10, headers=None, ssl_context=None, **kwargs):
ssl_assert_fingerprint=None, maxsize=10, headers=None, ssl_context=None, http_compress=False, **kwargs):

super(Urllib3HttpConnection, self).__init__(host=host, port=port, use_ssl=use_ssl, **kwargs)
self.http_compress = http_compress
self.headers = urllib3.make_headers(keep_alive=True)
if http_auth is not None:
if isinstance(http_auth, (tuple, list)):
Expand All @@ -80,6 +83,10 @@ def __init__(self, host='localhost', port=9200, http_auth=None,
for k in headers:
self.headers[k.lower()] = headers[k]

if self.http_compress == True:
self.headers.update(urllib3.make_headers(accept_encoding=True))
self.headers.update({'content-encoding': 'gzip'})

self.headers.setdefault('content-type', 'application/json')
pool_class = urllib3.HTTPConnectionPool
kw = {}
Expand Down Expand Up @@ -154,6 +161,8 @@ def perform_request(self, method, url, params=None, body=None, timeout=None, ign
if headers:
request_headers = request_headers.copy()
request_headers.update(headers)
if self.http_compress and body:
body = gzip.compress(body)
response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
duration = time.time() - start
raw_data = response.data.decode('utf-8')
Expand Down
5 changes: 5 additions & 0 deletions test_elasticsearch/test_connection.py
Expand Up @@ -32,6 +32,11 @@ def test_ssl_context(self):
)
self.assertTrue(con.use_ssl)

def test_http_compression(self):
con = Urllib3HttpConnection(http_compress=True)
self.assertTrue(con.http_compress)
self.assertEquals(con.headers['content-encoding'], 'gzip')

def test_timeout_set(self):
con = Urllib3HttpConnection(timeout=42)
self.assertEquals(42, con.timeout)
Expand Down

0 comments on commit 64c125d

Please sign in to comment.