diff --git a/requirements.txt b/requirements.txt index 6a86cb78..bd171f7b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,7 @@ pytest==2.7.3 pytest-gitignore==1.3 pytest-mock==0.9.0 pytest-pylint==0.4.0 +# moniter version 3 change at https://github.com/kennethreitz/requests/blob/proposed/3.0.0/3.0-HISTORY.rst +requests>=2.9.1,<3 +requests-file>=1.4 tox==2.2.1 diff --git a/setup.py b/setup.py index 14cdf1aa..e14cafb4 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ LONG_DESCRIPTION_MD = __doc__ LONG_DESCRIPTION = re.sub(r'(?s)\[(.*?)\]\((http.*?)\)', r'\1', LONG_DESCRIPTION_MD) -INSTALL_REQUIRES = ["setuptools", "idna"] +INSTALL_REQUIRES = ["setuptools", "idna", "requests>=2.9.1,<3", "requests-file>=1.4"] if (2, 7) > sys.version_info: INSTALL_REQUIRES.append("argparse>=1.2.1") diff --git a/tldextract/tldextract.py b/tldextract/tldextract.py index 7df45381..8a366f2c 100644 --- a/tldextract/tldextract.py +++ b/tldextract/tldextract.py @@ -37,9 +37,10 @@ import re import socket import warnings -import gzip import idna +import requests +from requests_file import FileAdapter try: import pkg_resources @@ -66,14 +67,10 @@ def resource_stream(cls, _, resource_name): # pylint: disable=import-error,invalid-name,no-name-in-module,redefined-builtin try: # pragma: no cover # Python 2 - from urllib2 import urlopen from urlparse import scheme_chars - from StringIO import StringIO except ImportError: # pragma: no cover # Python 3 - from urllib.request import urlopen from urllib.parse import scheme_chars - from io import StringIO unicode = str # pylint: enable=import-error,invalid-name,no-name-in-module,redefined-builtin @@ -369,15 +366,11 @@ def fetch_file(urls): for url in urls: try: - response = urlopen(url) - if response.info().get('Content-Encoding') == 'gzip': - buf = StringIO(response.read()) - gzip_response = gzip.GzipFile(fileobj=buf) - text = gzip_response.read() - else: - text = response.read() - except IOError as ioe: - LOG.error('Exception reading Public Suffix List url ' + url + ' - ' + str(ioe) + '.') + session = requests.Session() + session.mount('file://', FileAdapter()) + text = session.get(url).text + except requests.exceptions.RequestException as ree: + LOG.error('Exception reading Public Suffix List url ' + url + ' - ' + str(ree) + '.') else: return _decode_utf8(text) @@ -393,7 +386,10 @@ def _decode_utf8(text): The suffix list, wherever its origin, should be UTF-8 encoded. """ - return unicode(text, 'utf-8') + if not isinstance(text, unicode): + return unicode(text, 'utf-8') + else: + return text class _PublicSuffixListTLDExtractor(object):