From d421f1fefb2adffad87e0ca801e7e562bfae36e7 Mon Sep 17 00:00:00 2001 From: jnozsc Date: Fri, 5 Feb 2016 17:15:58 -0800 Subject: [PATCH 1/3] use requests instead of urllib --- requirements.txt | 2 ++ tldextract/tldextract.py | 26 +++++++++++--------------- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/requirements.txt b/requirements.txt index 6a86cb78..0dff95b2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,6 @@ pytest==2.7.3 pytest-gitignore==1.3 pytest-mock==0.9.0 pytest-pylint==0.4.0 +requests>=2.9.1 +requests-file>=1.4 tox==2.2.1 diff --git a/tldextract/tldextract.py b/tldextract/tldextract.py index 7df45381..8a366f2c 100644 --- a/tldextract/tldextract.py +++ b/tldextract/tldextract.py @@ -37,9 +37,10 @@ import re import socket import warnings -import gzip import idna +import requests +from requests_file import FileAdapter try: import pkg_resources @@ -66,14 +67,10 @@ def resource_stream(cls, _, resource_name): # pylint: disable=import-error,invalid-name,no-name-in-module,redefined-builtin try: # pragma: no cover # Python 2 - from urllib2 import urlopen from urlparse import scheme_chars - from StringIO import StringIO except ImportError: # pragma: no cover # Python 3 - from urllib.request import urlopen from urllib.parse import scheme_chars - from io import StringIO unicode = str # pylint: enable=import-error,invalid-name,no-name-in-module,redefined-builtin @@ -369,15 +366,11 @@ def fetch_file(urls): for url in urls: try: - response = urlopen(url) - if response.info().get('Content-Encoding') == 'gzip': - buf = StringIO(response.read()) - gzip_response = gzip.GzipFile(fileobj=buf) - text = gzip_response.read() - else: - text = response.read() - except IOError as ioe: - LOG.error('Exception reading Public Suffix List url ' + url + ' - ' + str(ioe) + '.') + session = requests.Session() + session.mount('file://', FileAdapter()) + text = session.get(url).text + except requests.exceptions.RequestException as ree: + LOG.error('Exception reading Public Suffix List url ' + url + ' - ' + str(ree) + '.') else: return _decode_utf8(text) @@ -393,7 +386,10 @@ def _decode_utf8(text): The suffix list, wherever its origin, should be UTF-8 encoded. """ - return unicode(text, 'utf-8') + if not isinstance(text, unicode): + return unicode(text, 'utf-8') + else: + return text class _PublicSuffixListTLDExtractor(object): From 454be3ea241bbb1e6b5d45c4ac01fe5728f51b34 Mon Sep 17 00:00:00 2001 From: jnozsc Date: Sat, 6 Feb 2016 09:28:39 -0800 Subject: [PATCH 2/3] fix setup.py --- requirements.txt | 3 ++- setup.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 0dff95b2..d72eb659 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,7 @@ pytest==2.7.3 pytest-gitignore==1.3 pytest-mock==0.9.0 pytest-pylint==0.4.0 -requests>=2.9.1 +# moniter version 3 change at https://github.com/kennethreitz/requests/blob/proposed/3.0.0/3.0-HISTORY.rst +requests>=2.9.2,<3 requests-file>=1.4 tox==2.2.1 diff --git a/setup.py b/setup.py index 14cdf1aa..e14cafb4 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ LONG_DESCRIPTION_MD = __doc__ LONG_DESCRIPTION = re.sub(r'(?s)\[(.*?)\]\((http.*?)\)', r'\1', LONG_DESCRIPTION_MD) -INSTALL_REQUIRES = ["setuptools", "idna"] +INSTALL_REQUIRES = ["setuptools", "idna", "requests>=2.9.1,<3", "requests-file>=1.4"] if (2, 7) > sys.version_info: INSTALL_REQUIRES.append("argparse>=1.2.1") From 58fe449ca6e5c4a0f4d2d36458839d713517b0fc Mon Sep 17 00:00:00 2001 From: jnozsc Date: Sat, 6 Feb 2016 09:31:01 -0800 Subject: [PATCH 3/3] typo --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index d72eb659..bd171f7b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,6 @@ pytest-gitignore==1.3 pytest-mock==0.9.0 pytest-pylint==0.4.0 # moniter version 3 change at https://github.com/kennethreitz/requests/blob/proposed/3.0.0/3.0-HISTORY.rst -requests>=2.9.2,<3 +requests>=2.9.1,<3 requests-file>=1.4 tox==2.2.1