diff --git a/datapackage/config.py b/datapackage/config.py index 951a8bb..875a0d7 100644 --- a/datapackage/config.py +++ b/datapackage/config.py @@ -7,6 +7,7 @@ # Module API +REMOTE_SCHEMES = ['http', 'https', 'ftp', 'ftps', 's3'] TABULAR_FORMATS = ['csv', 'tsv', 'xls', 'xlsx'] DEFAULT_DATA_PACKAGE_PROFILE = 'data-package' DEFAULT_RESOURCE_PROFILE = 'data-resource' diff --git a/datapackage/resource.py b/datapackage/resource.py index 70a1696..7b7ff57 100644 --- a/datapackage/resource.py +++ b/datapackage/resource.py @@ -12,7 +12,7 @@ import requests from copy import deepcopy from tableschema import Table, Storage -from six.moves.urllib.parse import urljoin +from six.moves.urllib.parse import urljoin, urlparse from six.moves.urllib.request import urlopen from .profile import Profile from . import exceptions @@ -489,11 +489,10 @@ def _inspect_source(data, path, base_path, storage): elif len(path) == 1: # Remote - # TODO: rebase on the approach `tableschema` uses with `config.REMOTE_SCHEMES - if path[0].startswith('http') or path[0].startswith('ftp') or path[0].startswith('s3'): + if urlparse(path[0]).scheme in config.REMOTE_SCHEMES: inspection['source'] = path[0] inspection['remote'] = True - elif base_path and base_path.startswith('http'): + elif base_path and urlparse(base_path).scheme in config.REMOTE_SCHEMES: norm_base_path = base_path if base_path.endswith('/') else base_path + '/' inspection['source'] = urljoin(norm_base_path, path[0]) inspection['remote'] = True