Skip to content
This repository has been archived by the owner on Jul 11, 2023. It is now read-only.

Commit

Permalink
fixed requote_uri for python2
Browse files Browse the repository at this point in the history
  • Loading branch information
roll committed Sep 12, 2016
1 parent f3dc0e6 commit 16c63c4
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 4 deletions.
1 change: 1 addition & 0 deletions pylama.ini
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
[pylama]
linters = pyflakes,mccabe,pep8
ignore = E731

[pylama:mccabe]
complexity = 16
Expand Down
23 changes: 22 additions & 1 deletion tabulator/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
import os
import re
import six
import requests.utils
from bs4 import BeautifulSoup
from six.moves.urllib.parse import urlparse
from six.moves.urllib.parse import urlparse, urlunparse
from chardet.universaldetector import UniversalDetector
from . import exceptions

Expand Down Expand Up @@ -118,3 +119,23 @@ def ensure_dir(path):
dirpath = os.path.dirname(path)
if dirpath and not os.path.exists(dirpath):
os.makedirs(dirpath)


def requote_uri(uri):
"""Requote uri if it contains non-ascii chars, spaces etc.
Args:
uri (str): uri to requote
"""
if six.PY2:
def url_encode_non_ascii(bytes):
pattern = '[\x80-\xFF]'
replace = lambda c: ('%%%02x' % ord(c.group(0))).upper()
return re.sub(pattern, replace, bytes)
parts = urlparse(uri)
uri = urlunparse(
part.encode('idna') if index == 1
else url_encode_non_ascii(part.encode('utf-8'))
for index, part in enumerate(parts))
return requests.utils.requote_uri(uri)
5 changes: 2 additions & 3 deletions tabulator/loaders/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

import io
import six
from requests.utils import requote_uri
from six.moves.urllib.request import urlopen
from .. import exceptions
from .. import helpers
Expand All @@ -26,8 +25,8 @@ def __init__(self, **options):

def load(self, source, encoding, mode):

# Requote uri if it contains spaces etc
source = requote_uri(source)
# Requote uri
source = helpers.requote_uri(source)

# Prepare bytes
if six.PY2:
Expand Down
8 changes: 8 additions & 0 deletions tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,11 @@ def test_reset_stream_seekable():
def test_reset_stream_not_seekable():
with pytest.raises(Exception):
helpers.reset_stream('not_seekable')


def test_requote_uri():
url = 'http://next.openspending.org/fdp-adapter/convert?url=https%3A%2F%2Fraw.githubusercontent.com%2Fkravets-levko%2Fdata%2Fmaster%2Ftest.xlsx.csv'
url1 = 'http://data.defra.gov.uk/ops/government_procurement_card/over_£500_GPC_apr_2013.csv'
url2 = 'http://data.defra.gov.uk/ops/government_procurement_card/over_%C2%A3500_GPC_apr_2013.csv'
assert helpers.requote_uri(url) == url
assert helpers.requote_uri(url1) == url2

0 comments on commit 16c63c4

Please sign in to comment.