From e32c32f6b55b56b9f885836c5695bcb38395abdc Mon Sep 17 00:00:00 2001 From: amercader Date: Fri, 19 Jul 2019 16:25:18 +0200 Subject: [PATCH 1/7] [#4914] Remove unused test file --- ckan/tests/legacy/misc/test_sync.py | 79 ----------------------------- 1 file changed, 79 deletions(-) delete mode 100644 ckan/tests/legacy/misc/test_sync.py diff --git a/ckan/tests/legacy/misc/test_sync.py b/ckan/tests/legacy/misc/test_sync.py deleted file mode 100644 index 2e4c8429d87..00000000000 --- a/ckan/tests/legacy/misc/test_sync.py +++ /dev/null @@ -1,79 +0,0 @@ -# encoding: utf-8 - -import subprocess -import urllib2 -import time - -from ckan.common import config - -import ckan.model as model -from ckan.tests.legacy import * -from ckan.common import json - -instance_dir = config['here'] - -class Options: - pid_file = 'paster.pid' - -# TODO: Reenable this when sync functionality is in place -class _TestSync(TestController): - @classmethod - def setup_class(self): - # setup Server A (sub process) - subprocess.call('paster db clean --config=test_sync.ini', shell=True) - subprocess.call('paster db init --config=test_sync.ini', shell=True) - subprocess.call('paster create-test-data --config=test_sync.ini', shell=True) - self.sub_proc = subprocess.Popen(['paster', 'serve', 'test_sync.ini']) - # setup Server B (this process) - # (clean) - - self._last_synced_revision_id = {'http://localhost:5050':None} - - @classmethod - def teardown_class(self): - self.sub_proc.kill() - model.repo.rebuild_db() - - def sub_app_get(self, offset): - count = 0 - while True: - try: - f = urllib2.urlopen('http://localhost:5050%s' % offset) - except urllib2.URLError as e: - if hasattr(e, 'reason') and type(e.reason) == urllib2.socket.error: - # i.e. process not started up yet - count += 1 - time.sleep(1) - assert count < 5, '%s: %r; %r' % (offset, e, e.args) - else: - print('Error opening url: %s' % offset) - assert 0, e # Print exception - else: - break - return f.read() - - def sub_app_get_deserialized(offset): - res = sub_app_get(offset) - if res == None: - return None - else: - return json.loads(res) - - def test_1_first_sync(self): - server = self._last_synced_revision_id.keys()[0] - assert server == 'http://localhost:5050' - - # find id of last revision synced - last_sync_rev_id = self._last_synced_revision_id[server] - assert last_sync_rev_id == None # no syncs yet - - # get revision ids since then - remote_rev_ids = self.sub_app_get_deserialized('%s/api/search/revision?since=%s' % (server, last_sync_rev_id)) - assert len(remote_rev_ids) == 3 - remote_latest_rev_id = remote_rev_ids[-1] - - # get revision diffs - diffs = self.sub_app_get_deserialized('%s/api/diff/revision?diff=%s&oldid=%s' % (server, remote_latest_rev_id, last_sync_rev_id)) - assert len(diffs) == 3 - - # apply diffs From 56b9cdea509d453dccac68cc12bf3cea079c4cad Mon Sep 17 00:00:00 2001 From: amercader Date: Fri, 19 Jul 2019 16:53:52 +0200 Subject: [PATCH 2/7] [#4914] Replace usage of urllib2.open with requests.get --- ckan/lib/captcha.py | 18 +++++++++--------- ckan/lib/search/__init__.py | 21 +++++++++++++-------- ckan/model/license.py | 19 +++++++++---------- ckan/tests/legacy/__init__.py | 12 ------------ 4 files changed, 31 insertions(+), 39 deletions(-) diff --git a/ckan/lib/captcha.py b/ckan/lib/captcha.py index 9a138dff1d7..7a34d6ba212 100644 --- a/ckan/lib/captcha.py +++ b/ckan/lib/captcha.py @@ -2,9 +2,8 @@ from ckan.common import config -import urllib -import urllib2 -import json +import requests + def check_recaptcha(request): '''Check a user\'s recaptcha submission is valid, and raise CaptchaError @@ -23,12 +22,13 @@ def check_recaptcha(request): # recaptcha_response_field will be unicode if there are foreign chars in # the user input. So we need to encode it as utf8 before urlencoding or # we get an exception (#1431). - params = urllib.urlencode(dict(secret=recaptcha_private_key, - remoteip=client_ip_address, - response=recaptcha_response_field.encode('utf8'))) - f = urllib2.urlopen(recaptcha_server_name, params) - data = json.load(f) - f.close() + params = dict( + secret=recaptcha_private_key, + remoteip=client_ip_address, + response=recaptcha_response_field.encode('utf8') + ) + response = requests.get(recaptcha_server_name, params) + data = response.json() try: if not data['success']: diff --git a/ckan/lib/search/__init__.py b/ckan/lib/search/__init__.py index 817dde72a89..8289ddf1012 100644 --- a/ckan/lib/search/__init__.py +++ b/ckan/lib/search/__init__.py @@ -5,7 +5,8 @@ import cgitb import warnings import xml.dom.minidom -import urllib2 + +import requests from paste.deploy.converters import asbool @@ -257,11 +258,13 @@ def _get_schema_from_solr(file_offset): url = solr_url.strip('/') + file_offset - req = urllib2.Request(url=url) if http_auth: - req.add_header('Authorization', http_auth) + response = requests.get( + url, headers={'Authorization': http_auth}) + else: + response = requests.get(url) - return urllib2.urlopen(req) + return response def check_solr_schema_version(schema_file=None): ''' @@ -299,14 +302,16 @@ def check_solr_schema_version(schema_file=None): try: # Try Managed Schema res = _get_schema_from_solr(SOLR_SCHEMA_FILE_OFFSET_MANAGED) - except urllib2.HTTPError: + res.raise_for_status() + except requests.HTTPError: # Fallback to Manually Edited schema.xml res = _get_schema_from_solr(SOLR_SCHEMA_FILE_OFFSET_CLASSIC) + schema_content = res.text else: - url = 'file://%s' % schema_file - res = urllib2.urlopen(url) + with open(schema_file, 'rb') as f: + schema_content = f.read() - tree = xml.dom.minidom.parseString(res.read()) + tree = xml.dom.minidom.parseString(schema_content) version = tree.documentElement.getAttribute('version') if not len(version): diff --git a/ckan/model/license.py b/ckan/model/license.py index 8965de0e3f6..08b7e2fd208 100644 --- a/ckan/model/license.py +++ b/ckan/model/license.py @@ -1,9 +1,10 @@ # encoding: utf-8 import datetime -import urllib2 import re +import requests + from ckan.common import config from paste.deploy.converters import asbool from six import text_type, string_types @@ -116,16 +117,14 @@ def __init__(self): def load_licenses(self, license_url): try: - response = urllib2.urlopen(license_url) - response_body = response.read() - except Exception as inst: - msg = "Couldn't connect to licenses service %r: %s" % (license_url, inst) + response = requests.get(license_url) + license_data = response.json() + except requests.RequestException as e: + msg = "Couldn't get the licenses file {}: {}".format(license_url, e) + raise Exception(msg) + except ValueError as e: + msg = "Couldn't parse the licenses file {}: {}".format(license_url, e) raise Exception(msg) - try: - license_data = json.loads(response_body) - except Exception as inst: - msg = "Couldn't read response from licenses service %r: %s" % (response_body, inst) - raise Exception(inst) for license in license_data: if isinstance(license, string_types): license = license_data[license] diff --git a/ckan/tests/legacy/__init__.py b/ckan/tests/legacy/__init__.py index 1f489d10a48..4e8340db074 100644 --- a/ckan/tests/legacy/__init__.py +++ b/ckan/tests/legacy/__init__.py @@ -259,18 +259,6 @@ def _start_ckan_server(config_file=None): process = subprocess.Popen(['paster', 'serve', config_path]) return process - @staticmethod - def _wait_for_url(url='http://127.0.0.1:5000/', timeout=15): - for i in range(int(timeout)*100): - import urllib2 - import time - try: - response = urllib2.urlopen(url) - except urllib2.URLError: - time.sleep(0.01) - else: - break - @staticmethod def _stop_ckan_server(process): pid = process.pid From 7ad7aaa370377197a5a4d45ef48ac7b99121210a Mon Sep 17 00:00:00 2001 From: amercader Date: Fri, 30 Aug 2019 15:26:24 +0200 Subject: [PATCH 3/7] [#4914] [#4914] [#4914] Replace direct usage of urllib and urllib2 with six wrappers --- ckan/config/middleware/common_middleware.py | 6 ++--- ckan/controllers/api.py | 4 ++-- ckan/controllers/group.py | 2 +- ckan/controllers/package.py | 2 +- ckan/lib/helpers.py | 8 +++---- ckan/lib/search/common.py | 7 +++--- ckan/tests/legacy/functional/api/base.py | 6 ++--- .../functional/api/test_package_search.py | 2 +- ckan/tests/legacy/test_coding_standards.py | 1 - ckan/views/dataset.py | 2 +- ckan/views/group.py | 2 +- ckanext/datastore/backend/postgres.py | 23 ++++++++++--------- ckanext/datastore/tests/test_search.py | 7 +++--- ckanext/datatablesview/controller.py | 2 +- 14 files changed, 38 insertions(+), 36 deletions(-) diff --git a/ckan/config/middleware/common_middleware.py b/ckan/config/middleware/common_middleware.py index f87ccf7a2c8..969ea39ff13 100644 --- a/ckan/config/middleware/common_middleware.py +++ b/ckan/config/middleware/common_middleware.py @@ -1,12 +1,12 @@ # encoding: utf-8 """Common middleware used by both Flask and Pylons app stacks.""" - -import urllib2 import hashlib import json import cgi +from six.moves.urllib.parse import unquote + import sqlalchemy as sa from webob.request import FakeCGIBody @@ -73,7 +73,7 @@ def __call__(self, environ, start_response): data = {} for part in parts: k, v = part.split('=') - data[k] = urllib2.unquote(v).decode("utf8") + data[k] = unquote(v).decode("utf8") start_response('200 OK', [('Content-Type', 'text/html')]) # we want a unique anonomized key for each user so that we do # not count multiple clicks from the same user. diff --git a/ckan/controllers/api.py b/ckan/controllers/api.py index e5b4bddcdb9..22be5c687d9 100644 --- a/ckan/controllers/api.py +++ b/ckan/controllers/api.py @@ -3,9 +3,9 @@ import os.path import logging import cgi -import urllib from six import text_type +from six.moves.urllib.parse import unquote_plus import ckan.model as model import ckan.logic as logic @@ -383,7 +383,7 @@ def make_unicode(entity): if keys and request.POST[keys[0]] in [u'1', u'']: request_data = keys[0] else: - request_data = urllib.unquote_plus(request.body) + request_data = unquote_plus(request.body) except Exception as inst: msg = "Could not find the POST data: %r : %s" % \ (request.POST, inst) diff --git a/ckan/controllers/group.py b/ckan/controllers/group.py index 46c5c2b1813..dc2141b9fbd 100644 --- a/ckan/controllers/group.py +++ b/ckan/controllers/group.py @@ -2,7 +2,7 @@ import logging import datetime -from urllib import urlencode +from six.moves.urllib.parse import urlencode from pylons.i18n import get_lang from six import string_types, text_type diff --git a/ckan/controllers/package.py b/ckan/controllers/package.py index ec9d12b0499..dcc583c43e7 100644 --- a/ckan/controllers/package.py +++ b/ckan/controllers/package.py @@ -1,7 +1,7 @@ # encoding: utf-8 import logging -from urllib import urlencode +from six.moves.urllib.parse import urlencode import datetime import mimetypes import cgi diff --git a/ckan/lib/helpers.py b/ckan/lib/helpers.py index 577d62232ac..d726202f5d3 100644 --- a/ckan/lib/helpers.py +++ b/ckan/lib/helpers.py @@ -12,11 +12,9 @@ import os import pytz import tzlocal -import urllib import pprint import copy import urlparse -from urllib import urlencode import uuid from paste.deploy import converters @@ -35,7 +33,9 @@ from flask import url_for as _flask_default_url_for from werkzeug.routing import BuildError as FlaskRouteBuildError import i18n + from six import string_types, text_type +from six.moves.urllib.parse import urlencode, quote, unquote import ckan.exceptions import ckan.model as model @@ -573,7 +573,7 @@ def full_current_url(): @core_helper def current_url(): ''' Returns current url unquoted''' - return urllib.unquote(request.environ['CKAN_CURRENT_URL']) + return unquote(request.environ['CKAN_CURRENT_URL']) @core_helper @@ -1332,7 +1332,7 @@ def gravatar(email_hash, size=100, default=None): if default not in _VALID_GRAVATAR_DEFAULTS: # treat the default as a url - default = urllib.quote(default, safe='') + default = quote(default, safe='') return literal('''Gravatar''' diff --git a/ckan/lib/search/common.py b/ckan/lib/search/common.py index b0ff96a8a27..f951fcbdca0 100644 --- a/ckan/lib/search/common.py +++ b/ckan/lib/search/common.py @@ -5,8 +5,9 @@ import re import pysolr import simplejson + from six import string_types -import urllib +from six.moves.urllib.parse import quote_plus log = logging.getLogger(__name__) @@ -72,8 +73,8 @@ def make_connection(decode_dates=True): protocol = re.search('http(?:s)?://', solr_url).group() solr_url = re.sub(protocol, '', solr_url) solr_url = "{}{}:{}@{}".format(protocol, - urllib.quote_plus(solr_user), - urllib.quote_plus(solr_password), + quote_plus(solr_user), + quote_plus(solr_password), solr_url) if decode_dates: diff --git a/ckan/tests/legacy/functional/api/base.py b/ckan/tests/legacy/functional/api/base.py index 3afa66fff23..7232e209e5f 100644 --- a/ckan/tests/legacy/functional/api/base.py +++ b/ckan/tests/legacy/functional/api/base.py @@ -5,12 +5,12 @@ except ImportError: from StringIO import StringIO -import urllib - from nose.tools import assert_equal from paste.fixture import TestRequest from webhelpers.html import url_escape +from six.moves.urllib.parse import quote + import ckan.model as model from ckan.tests.legacy import CreateTestData from ckan.tests.legacy import TestController as ControllerTestCase @@ -83,7 +83,7 @@ def offset(self, path): if self.api_version: base += '/%s' % self.api_version utf8_encoded = (u'%s%s' % (base, path)).encode('utf8') - url_encoded = urllib.quote(utf8_encoded) + url_encoded = quote(utf8_encoded) return url_encoded def assert_msg_represents_anna(self, msg): diff --git a/ckan/tests/legacy/functional/api/test_package_search.py b/ckan/tests/legacy/functional/api/test_package_search.py index 984c08aa29b..31792d3cc71 100644 --- a/ckan/tests/legacy/functional/api/test_package_search.py +++ b/ckan/tests/legacy/functional/api/test_package_search.py @@ -1,6 +1,6 @@ # encoding: utf-8 -from urllib import quote +from six.moves.urllib.parse import quote import webhelpers diff --git a/ckan/tests/legacy/test_coding_standards.py b/ckan/tests/legacy/test_coding_standards.py index be0137bc657..29f37bedc0d 100644 --- a/ckan/tests/legacy/test_coding_standards.py +++ b/ckan/tests/legacy/test_coding_standards.py @@ -312,7 +312,6 @@ class TestPep8(object): 'ckan/include/rcssmin.py', 'ckan/include/rjsmin.py', 'ckan/lib/app_globals.py', - 'ckan/lib/captcha.py', 'ckan/lib/cli.py', 'ckan/lib/create_test_data.py', 'ckan/lib/dictization/__init__.py', diff --git a/ckan/views/dataset.py b/ckan/views/dataset.py index 40e8dad4992..ed107fa8172 100644 --- a/ckan/views/dataset.py +++ b/ckan/views/dataset.py @@ -2,7 +2,7 @@ import logging from collections import OrderedDict from functools import partial -from urllib import urlencode +from six.moves.urllib.parse import urlencode import datetime from flask import Blueprint, make_response diff --git a/ckan/views/group.py b/ckan/views/group.py index 71f304d3ca9..868a1a251a5 100644 --- a/ckan/views/group.py +++ b/ckan/views/group.py @@ -3,7 +3,7 @@ import datetime import logging import re -from urllib import urlencode +from six.moves.urllib.parse import urlencode from pylons.i18n import get_lang from six import string_types, text_type diff --git a/ckanext/datastore/backend/postgres.py b/ckanext/datastore/backend/postgres.py index a58badac855..9fe1c96dde8 100644 --- a/ckanext/datastore/backend/postgres.py +++ b/ckanext/datastore/backend/postgres.py @@ -7,14 +7,15 @@ import os import pprint import sqlalchemy.engine.url as sa_url -import urllib -import urllib2 import urlparse import datetime import hashlib import json from cStringIO import StringIO +from six.moves.urllib.parse import ( + urlencode, unquote, urlunparse, parse_qsl +) from six import string_types, text_type import ckan.lib.cli as cli @@ -676,10 +677,10 @@ def _insert_links(data_dict, limit, offset): return # no links required for local actions # change the offset in the url - parsed = list(urlparse.urlparse(urlstring)) - query = urllib2.unquote(parsed[4]) + parsed = list(urlparse(urlstring)) + query = unquote(parsed[4]) - arguments = dict(urlparse.parse_qsl(query)) + arguments = dict(parse_qsl(query)) arguments_start = dict(arguments) arguments_prev = dict(arguments) arguments_next = dict(arguments) @@ -691,15 +692,15 @@ def _insert_links(data_dict, limit, offset): parsed_start = parsed[:] parsed_prev = parsed[:] parsed_next = parsed[:] - parsed_start[4] = urllib.urlencode(arguments_start) - parsed_next[4] = urllib.urlencode(arguments_next) - parsed_prev[4] = urllib.urlencode(arguments_prev) + parsed_start[4] = urlencode(arguments_start) + parsed_next[4] = urlencode(arguments_next) + parsed_prev[4] = urlencode(arguments_prev) # add the links to the data dict - data_dict['_links']['start'] = urlparse.urlunparse(parsed_start) - data_dict['_links']['next'] = urlparse.urlunparse(parsed_next) + data_dict['_links']['start'] = urlunparse(parsed_start) + data_dict['_links']['next'] = urlunparse(parsed_next) if int(offset) - int(limit) > 0: - data_dict['_links']['prev'] = urlparse.urlunparse(parsed_prev) + data_dict['_links']['prev'] = urlunparse(parsed_prev) def _where(where_clauses_and_values): diff --git a/ckanext/datastore/tests/test_search.py b/ckanext/datastore/tests/test_search.py index 4c79035735a..bf0af321f2b 100644 --- a/ckanext/datastore/tests/test_search.py +++ b/ckanext/datastore/tests/test_search.py @@ -2,11 +2,12 @@ import json import nose -import urllib import pprint import sqlalchemy.orm as orm +from six.moves.urllib.parse import urlencode + import ckan.plugins as p import ckan.lib.create_test_data as ctd import ckan.model as model @@ -1148,7 +1149,7 @@ def test_self_join(self): where a.author = b.author limit 2 '''.format(self.data['resource_id']) - data = urllib.urlencode({'sql': query}) + data = urlencode({'sql': query}) auth = {'Authorization': str(self.normal_user.apikey)} res = self.app.post('/api/action/datastore_search_sql', params=data, extra_environ=auth) @@ -1211,7 +1212,7 @@ def test_not_authorized_to_access_system_tables(self): ] for query in test_cases: data = {'sql': query.replace('\n', '')} - postparams = urllib.urlencode(data) + postparams = urlencode(data) res = self.app.post('/api/action/datastore_search_sql', params=postparams, status=403) diff --git a/ckanext/datatablesview/controller.py b/ckanext/datatablesview/controller.py index f45984fa499..6298fb70385 100644 --- a/ckanext/datatablesview/controller.py +++ b/ckanext/datatablesview/controller.py @@ -1,6 +1,6 @@ # encoding: utf-8 -from urllib import urlencode +from six.moves.urllib.parse import urlencode from six import text_type From 79455dd076041cbad7eb6e8ddf3f3790927288bb Mon Sep 17 00:00:00 2001 From: amercader Date: Fri, 30 Aug 2019 15:45:09 +0200 Subject: [PATCH 4/7] [#4914] [#4914] Replace direct usage of urlparse with six wrappers --- ckan/config/environment.py | 3 ++- ckan/config/middleware/__init__.py | 11 +++++------ ckan/controllers/feed.py | 5 +++-- ckan/lib/datapreview.py | 7 ++++--- ckan/lib/dictization/model_dictize.py | 5 +++-- ckan/lib/helpers.py | 23 ++++++++++++----------- ckan/logic/validators.py | 6 +++--- ckan/views/feed.py | 4 ++-- ckanext/datapusher/logic/action.py | 2 +- ckanext/datastore/backend/postgres.py | 3 +-- ckanext/resourceproxy/controller.py | 4 ++-- ckanext/resourceproxy/plugin.py | 5 +++-- ckanext/textview/tests/test_view.py | 5 +++-- 13 files changed, 44 insertions(+), 39 deletions(-) diff --git a/ckan/config/environment.py b/ckan/config/environment.py index 16c5842afcf..f96c3e7c0bd 100644 --- a/ckan/config/environment.py +++ b/ckan/config/environment.py @@ -4,13 +4,14 @@ import os import logging import warnings -from urlparse import urlparse import pytz import sqlalchemy from pylons import config as pylons_config import formencode +from six.moves.urllib.parse import urlparse + import ckan.config.routing as routing import ckan.model as model import ckan.plugins as p diff --git a/ckan/config/middleware/__init__.py b/ckan/config/middleware/__init__.py index 73af42fa2f4..433a9bc4505 100644 --- a/ckan/config/middleware/__init__.py +++ b/ckan/config/middleware/__init__.py @@ -1,13 +1,12 @@ # encoding: utf-8 """WSGI app initialization""" -import urllib -import urlparse -import urllib import webob from routes import request_config as routes_request_config +from six.moves.urllib.parse import urlparse, quote + from ckan.lib.i18n import get_locales_from_config from ckan.config.environment import load_environment from ckan.config.middleware.flask_app import make_flask_stack @@ -147,13 +146,13 @@ def handle_i18n(self, environ): path_info = environ['PATH_INFO'] # sort out weird encodings path_info = \ - '/'.join(urllib.quote(pce, '') for pce in path_info.split('/')) + '/'.join(quote(pce, '') for pce in path_info.split('/')) qs = environ.get('QUERY_STRING') if qs: # sort out weird encodings - qs = urllib.quote(qs, '') + qs = quote(qs, '') environ['CKAN_CURRENT_URL'] = '%s?%s' % (path_info, qs) else: environ['CKAN_CURRENT_URL'] = path_info @@ -193,7 +192,7 @@ def __call__(self, environ, start_response): if app_name == 'flask_app': # This request will be served by Flask, but we still need the # Pylons URL builder (Routes) to work - parts = urlparse.urlparse(config.get('ckan.site_url', + parts = urlparse(config.get('ckan.site_url', 'http://0.0.0.0:5000')) request_config = routes_request_config() request_config.host = str(parts.netloc + parts.path) diff --git a/ckan/controllers/feed.py b/ckan/controllers/feed.py index 81f71b8e17e..793bb750914 100644 --- a/ckan/controllers/feed.py +++ b/ckan/controllers/feed.py @@ -22,9 +22,10 @@ """ # TODO fix imports import logging -import urlparse from six import text_type +from six.moves.urllib.parse import urlparse + import webhelpers.feedgenerator import ckan.lib.base as base @@ -131,7 +132,7 @@ def _create_atom_id(resource_path, authority_name=None, date_string=None): authority_name = config.get('ckan.feeds.authority_name', '').strip() if not authority_name: site_url = config.get('ckan.site_url', '').strip() - authority_name = urlparse.urlparse(site_url).netloc + authority_name = urlparse(site_url).netloc if not authority_name: log.warning('No authority_name available for feed generation. ' diff --git a/ckan/lib/datapreview.py b/ckan/lib/datapreview.py index e6e336098b0..95f0494ee1d 100644 --- a/ckan/lib/datapreview.py +++ b/ckan/lib/datapreview.py @@ -5,9 +5,10 @@ Functions and data structures that are needed for the ckan data preview. """ -import urlparse import logging +from six.moves.urllib.parse import urlparse + from ckan.common import config import ckan.plugins as p @@ -36,9 +37,9 @@ def compare_domains(urls): # all urls are interpreted as absolute urls, # except for urls that start with a / try: - if not urlparse.urlparse(url).scheme and not url.startswith('/'): + if not urlparse(url).scheme and not url.startswith('/'): url = '//' + url - parsed = urlparse.urlparse(url.lower(), 'http') + parsed = urlparse(url.lower(), 'http') domain = (parsed.scheme, parsed.hostname, parsed.port) except ValueError: # URL is so messed up that even urlparse can't stand it diff --git a/ckan/lib/dictization/model_dictize.py b/ckan/lib/dictization/model_dictize.py index 14753410f65..2bef1211666 100644 --- a/ckan/lib/dictization/model_dictize.py +++ b/ckan/lib/dictization/model_dictize.py @@ -12,7 +12,8 @@ which builds the dictionary by iterating over the table columns. ''' import datetime -import urlparse + +from six.moves.urllib.parse import urlsplit from ckan.common import config from sqlalchemy.sql import select @@ -115,7 +116,7 @@ def resource_dictize(res, context): resource_id=res.id, filename=cleaned_name, qualified=True) - elif resource['url'] and not urlparse.urlsplit(url).scheme and not context.get('for_edit'): + elif resource['url'] and not urlsplit(url).scheme and not context.get('for_edit'): resource['url'] = u'http://' + url.lstrip('/') return resource diff --git a/ckan/lib/helpers.py b/ckan/lib/helpers.py index d726202f5d3..37c1d1d42ed 100644 --- a/ckan/lib/helpers.py +++ b/ckan/lib/helpers.py @@ -14,7 +14,6 @@ import tzlocal import pprint import copy -import urlparse import uuid from paste.deploy import converters @@ -35,7 +34,9 @@ import i18n from six import string_types, text_type -from six.moves.urllib.parse import urlencode, quote, unquote +from six.moves.urllib.parse import ( + urlencode, quote, unquote, urlparse, urlunparse +) import ckan.exceptions import ckan.model as model @@ -232,7 +233,7 @@ def get_site_protocol_and_host(): ''' site_url = config.get('ckan.site_url', None) if site_url is not None: - parsed_url = urlparse.urlparse(site_url) + parsed_url = urlparse(site_url) return ( parsed_url.scheme.encode('utf-8'), parsed_url.netloc.encode('utf-8') @@ -394,9 +395,9 @@ def _url_for_flask(*args, **kw): # Flask to pass the host explicitly, so we rebuild the URL manually # based on `ckan.site_url`, which is essentially what we did on Pylons protocol, host = get_site_protocol_and_host() - parts = urlparse.urlparse(my_url) - my_url = urlparse.urlunparse((protocol, host, parts.path, parts.params, - parts.query, parts.fragment)) + parts = urlparse(my_url) + my_url = urlunparse((protocol, host, parts.path, parts.params, + parts.query, parts.fragment)) return my_url @@ -434,7 +435,7 @@ def url_for_static(*args, **kw): This is a wrapper for :py:func:`routes.url_for` ''' if args: - url = urlparse.urlparse(args[0]) + url = urlparse(args[0]) url_is_external = (url.scheme != '' or url.netloc != '') if url_is_external: CkanUrlException = ckan.exceptions.CkanUrlException @@ -450,7 +451,7 @@ def url_for_static_or_external(*args, **kw): This is a wrapper for :py:func:`routes.url_for` ''' def fix_arg(arg): - url = urlparse.urlparse(str(arg)) + url = urlparse(str(arg)) url_is_relative = (url.scheme == '' and url.netloc == '' and not url.path.startswith('/')) if url_is_relative: @@ -473,7 +474,7 @@ def is_url(*args, **kw): if not args: return False try: - url = urlparse.urlparse(args[0]) + url = urlparse(args[0]) except ValueError: return False @@ -555,9 +556,9 @@ def url_is_local(url): '''Returns True if url is local''' if not url or url.startswith('//'): return False - parsed = urlparse.urlparse(url) + parsed = urlparse(url) if parsed.scheme: - domain = urlparse.urlparse(url_for('/', qualified=True)).netloc + domain = urlparse(url_for('/', qualified=True)).netloc if domain != parsed.netloc: return False return True diff --git a/ckan/logic/validators.py b/ckan/logic/validators.py index 8e8d4069f03..04d71fb092c 100644 --- a/ckan/logic/validators.py +++ b/ckan/logic/validators.py @@ -5,8 +5,10 @@ from itertools import count import re import mimetypes +import string from six import string_types +from six.moves.urllib.parse import urlparse import ckan.lib.navl.dictization_functions as df import ckan.logic as logic @@ -687,15 +689,13 @@ def tag_not_in_vocabulary(key, tag_dict, errors, context): def url_validator(key, data, errors, context): ''' Checks that the provided value (if it is present) is a valid URL ''' - import urlparse - import string url = data.get(key, None) if not url: return try: - pieces = urlparse.urlparse(url) + pieces = urlparse(url) if all([pieces.scheme, pieces.netloc]) and \ set(pieces.netloc) <= set(string.letters + string.digits + '-.') and \ pieces.scheme in ['http', 'https']: diff --git a/ckan/views/feed.py b/ckan/views/feed.py index cf54b6f0105..d6265e99df9 100644 --- a/ckan/views/feed.py +++ b/ckan/views/feed.py @@ -1,8 +1,8 @@ # encoding: utf-8 import logging -import urlparse +from six.moves.urllib.parse import urlparse from flask import Blueprint, make_response from six import text_type import webhelpers.feedgenerator @@ -477,7 +477,7 @@ def _create_atom_id(resource_path, authority_name=None, date_string=None): authority_name = config.get(u'ckan.feeds.authority_name', u'').strip() if not authority_name: site_url = config.get(u'ckan.site_url', u'').strip() - authority_name = urlparse.urlparse(site_url).netloc + authority_name = urlparse(site_url).netloc if not authority_name: log.warning(u'No authority_name available for feed generation. ' diff --git a/ckanext/datapusher/logic/action.py b/ckanext/datapusher/logic/action.py index 5a5f5196767..81545e67d4a 100644 --- a/ckanext/datapusher/logic/action.py +++ b/ckanext/datapusher/logic/action.py @@ -2,10 +2,10 @@ import logging import json -import urlparse import datetime import time +from six.moves.urllib.parse import urljoin from dateutil.parser import parse as parse_date import requests diff --git a/ckanext/datastore/backend/postgres.py b/ckanext/datastore/backend/postgres.py index 9fe1c96dde8..44d78030097 100644 --- a/ckanext/datastore/backend/postgres.py +++ b/ckanext/datastore/backend/postgres.py @@ -7,14 +7,13 @@ import os import pprint import sqlalchemy.engine.url as sa_url -import urlparse import datetime import hashlib import json from cStringIO import StringIO from six.moves.urllib.parse import ( - urlencode, unquote, urlunparse, parse_qsl + urlencode, unquote, urlunparse, parse_qsl, urlparse ) from six import string_types, text_type diff --git a/ckanext/resourceproxy/controller.py b/ckanext/resourceproxy/controller.py index 40371d9c90f..158044c3005 100644 --- a/ckanext/resourceproxy/controller.py +++ b/ckanext/resourceproxy/controller.py @@ -1,8 +1,8 @@ # encoding: utf-8 from logging import getLogger -import urlparse +from six.moves.urllib.parse import urlsplit import requests from ckan.common import config @@ -33,7 +33,7 @@ def proxy_resource(context, data_dict): base.abort(404, _('Resource not found')) url = resource['url'] - parts = urlparse.urlsplit(url) + parts = urlsplit(url) if not parts.scheme or not parts.netloc: base.abort(409, detail='Invalid URL.') diff --git a/ckanext/resourceproxy/plugin.py b/ckanext/resourceproxy/plugin.py index edfd34aa8a8..175bb80a7ed 100644 --- a/ckanext/resourceproxy/plugin.py +++ b/ckanext/resourceproxy/plugin.py @@ -2,10 +2,11 @@ from logging import getLogger +from six.moves.urllib.parse import urlparse + import ckan.lib.helpers as h import ckan.plugins as p import ckan.lib.datapreview as datapreview -import urlparse from ckan.common import config log = getLogger(__name__) @@ -21,7 +22,7 @@ def get_proxified_resource_url(data_dict, proxy_schemes=['http','https']): ckan_url = config.get('ckan.site_url', '//localhost:5000') url = data_dict['resource']['url'] - scheme = urlparse.urlparse(url).scheme + scheme = urlparse(url).scheme compare_domains = datapreview.compare_domains if not compare_domains([ckan_url, url]) and scheme in proxy_schemes: url = h.url_for( diff --git a/ckanext/textview/tests/test_view.py b/ckanext/textview/tests/test_view.py index 87cd7423606..9c6edaa3e8a 100644 --- a/ckanext/textview/tests/test_view.py +++ b/ckanext/textview/tests/test_view.py @@ -1,6 +1,7 @@ # encoding: utf-8 from ckan.common import config -import urlparse + +from six.moves.urllib.parse import urljoin import ckan.model as model import ckan.plugins as plugins @@ -51,7 +52,7 @@ def teardown_class(cls): model.repo.rebuild_db() def test_can_view(self): - url_same_domain = urlparse.urljoin( + url_same_domain = urljoin( config.get('ckan.site_url', '//localhost:5000'), '/resource.txt') url_different_domain = 'http://some.com/resource.txt' From 613888449a54bc47f187defcd532fd11d4362477 Mon Sep 17 00:00:00 2001 From: amercader Date: Fri, 6 Sep 2019 15:05:00 +0200 Subject: [PATCH 5/7] [#4914] Support local licenses files --- ckan/model/license.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ckan/model/license.py b/ckan/model/license.py index 0266850ecdf..f70394c0c84 100644 --- a/ckan/model/license.py +++ b/ckan/model/license.py @@ -117,8 +117,12 @@ def __init__(self): def load_licenses(self, license_url): try: - response = requests.get(license_url) - license_data = response.json() + if license_url.startswith('file://'): + with open(license_url.replace('file://', ''), 'r') as f: + license_data = json.load(f) + else: + response = requests.get(license_url) + license_data = response.json() except requests.RequestException as e: msg = "Couldn't get the licenses file {}: {}".format(license_url, e) raise Exception(msg) From 8ca6e3b66a77520511ac83cadc7f6d149185d4c0 Mon Sep 17 00:00:00 2001 From: amercader Date: Fri, 6 Sep 2019 15:06:29 +0200 Subject: [PATCH 6/7] [#4914] Fix bad references --- ckan/config/middleware/__init__.py | 4 ++-- ckan/lib/captcha.py | 4 +++- ckan/lib/search/__init__.py | 6 ++++-- ckanext/datapusher/logic/action.py | 6 +++--- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/ckan/config/middleware/__init__.py b/ckan/config/middleware/__init__.py index 433a9bc4505..92fc6e3d2a6 100644 --- a/ckan/config/middleware/__init__.py +++ b/ckan/config/middleware/__init__.py @@ -192,8 +192,8 @@ def __call__(self, environ, start_response): if app_name == 'flask_app': # This request will be served by Flask, but we still need the # Pylons URL builder (Routes) to work - parts = urlparse(config.get('ckan.site_url', - 'http://0.0.0.0:5000')) + parts = urlparse( + config.get('ckan.site_url', 'http://0.0.0.0:5000')) request_config = routes_request_config() request_config.host = str(parts.netloc + parts.path) request_config.protocol = str(parts.scheme) diff --git a/ckan/lib/captcha.py b/ckan/lib/captcha.py index 7a34d6ba212..aff2df033ac 100644 --- a/ckan/lib/captcha.py +++ b/ckan/lib/captcha.py @@ -13,7 +13,8 @@ def check_recaptcha(request): # Recaptcha not enabled return - client_ip_address = request.environ.get('REMOTE_ADDR', 'Unknown IP Address') + client_ip_address = request.environ.get( + 'REMOTE_ADDR', 'Unknown IP Address') # reCAPTCHA v2 recaptcha_response_field = request.form.get('g-recaptcha-response', '') @@ -37,5 +38,6 @@ def check_recaptcha(request): # Something weird with recaptcha response raise CaptchaError() + class CaptchaError(ValueError): pass diff --git a/ckan/lib/search/__init__.py b/ckan/lib/search/__init__.py index 2520410bcc4..09967501573 100644 --- a/ckan/lib/search/__init__.py +++ b/ckan/lib/search/__init__.py @@ -315,8 +315,10 @@ def check_solr_schema_version(schema_file=None): version = tree.documentElement.getAttribute('version') if not len(version): - raise SearchError('Could not extract version info from the SOLR' - ' schema, using file: \n%s' % url) + msg = 'Could not extract version info from the SOLR schema' + if schema_file: + msg =+ ', using file {}'.format(schema_file) + raise SearchError(msg) if not version in SUPPORTED_SCHEMA_VERSIONS: raise SearchError('SOLR schema version not supported: %s. Supported' diff --git a/ckanext/datapusher/logic/action.py b/ckanext/datapusher/logic/action.py index 81545e67d4a..ce93fd24d28 100644 --- a/ckanext/datapusher/logic/action.py +++ b/ckanext/datapusher/logic/action.py @@ -65,7 +65,7 @@ def datapusher_submit(context, data_dict): callback_url_base = config.get('ckan.datapusher.callback_url_base') if callback_url_base: - callback_url = urlparse.urljoin( + callback_url = urljoin( callback_url_base.rstrip('/'), '/api/3/action/datapusher_hook') else: callback_url = h.url_for( @@ -123,7 +123,7 @@ def datapusher_submit(context, data_dict): try: r = requests.post( - urlparse.urljoin(datapusher_url, 'job'), + urljoin(datapusher_url, 'job'), headers={ 'Content-Type': 'application/json' }, @@ -289,7 +289,7 @@ def datapusher_status(context, data_dict): job_detail = None if job_id: - url = urlparse.urljoin(datapusher_url, 'job' + '/' + job_id) + url = urljoin(datapusher_url, 'job' + '/' + job_id) try: r = requests.get(url, headers={'Content-Type': 'application/json', 'Authorization': job_key}) From e2df68facbd21705f709593e5240599d309ee446 Mon Sep 17 00:00:00 2001 From: amercader Date: Fri, 6 Sep 2019 15:23:18 +0200 Subject: [PATCH 7/7] [#4914] Fix operator It's late Friday and I'm tired --- ckan/lib/search/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckan/lib/search/__init__.py b/ckan/lib/search/__init__.py index 09967501573..9fa6db538f0 100644 --- a/ckan/lib/search/__init__.py +++ b/ckan/lib/search/__init__.py @@ -317,7 +317,7 @@ def check_solr_schema_version(schema_file=None): if not len(version): msg = 'Could not extract version info from the SOLR schema' if schema_file: - msg =+ ', using file {}'.format(schema_file) + msg += ', using file {}'.format(schema_file) raise SearchError(msg) if not version in SUPPORTED_SCHEMA_VERSIONS: