From 515cf5d5465c2a99b2a5fc66235aa1525525e723 Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Wed, 18 Apr 2018 15:18:54 +0300 Subject: [PATCH 01/99] package_autocomplete use solr --- ckan/config/solr/schema.xml | 18 +++++++++++++++++- ckan/lib/search/__init__.py | 2 +- ckan/logic/action/get.py | 35 +++++++++++++++++++---------------- 3 files changed, 37 insertions(+), 18 deletions(-) diff --git a/ckan/config/solr/schema.xml b/ckan/config/solr/schema.xml index 8e5018a2e2d..0f144de200b 100644 --- a/ckan/config/solr/schema.xml +++ b/ckan/config/solr/schema.xml @@ -24,7 +24,7 @@ - + @@ -81,6 +81,18 @@ schema. In this case the version should be set to the next CKAN version number. + + + + + + + + + + + + @@ -89,10 +101,12 @@ schema. In this case the version should be set to the next CKAN version number. + + @@ -165,6 +179,8 @@ schema. In this case the version should be set to the next CKAN version number. + + diff --git a/ckan/lib/search/__init__.py b/ckan/lib/search/__init__.py index dbc8ac12cc5..b3bb423c0ae 100644 --- a/ckan/lib/search/__init__.py +++ b/ckan/lib/search/__init__.py @@ -31,7 +31,7 @@ def text_traceback(): return res -SUPPORTED_SCHEMA_VERSIONS = ['2.8'] +SUPPORTED_SCHEMA_VERSIONS = ['2.8' ,'2.81'] DEFAULT_OPTIONS = { 'limit': 20, diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index 51d73d4bebf..0be5c932568 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -1496,34 +1496,37 @@ def package_autocomplete(context, data_dict): :rtype: list of dictionaries ''' - model = context['model'] - _check_access('package_autocomplete', context, data_dict) limit = data_dict.get('limit', 10) q = data_dict['q'] - like_q = u"%s%%" % q - - query = model.Session.query(model.Package) - query = query.filter(model.Package.state == 'active') - query = query.filter(model.Package.private == False) - query = query.filter(_or_(model.Package.name.ilike(like_q), - model.Package.title.ilike(like_q))) - query = query.limit(limit) + data_dict = { + 'fq': '+capacity:public', + 'q': ' OR '.join([ + 'name_ngram:{0}', + 'title_ngram:{0}', + 'name:{0}', + 'title:{0}', + ]).format(search.query.solr_literal(q)), + 'fl': ['name', 'title'], + 'rows': limit + } + query = search.query_for(model.Package) + results = query.run(data_dict)['results'] q_lower = q.lower() pkg_list = [] - for package in query: - if package.name.startswith(q_lower): + for package in results: + if q_lower in package['name']: match_field = 'name' - match_displayed = package.name + match_displayed = package['name'] else: match_field = 'title' - match_displayed = '%s (%s)' % (package.title, package.name) + match_displayed = '%s (%s)' % (package['title'], package['name']) result_dict = { - 'name': package.name, - 'title': package.title, + 'name': package['name'], + 'title': package['title'], 'match_field': match_field, 'match_displayed': match_displayed} pkg_list.append(result_dict) From 49611a61927a8a61e2efc51b102e5452ca74fcac Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Tue, 1 May 2018 11:02:00 +0300 Subject: [PATCH 02/99] updated schema version --- ckan/config/solr/schema.xml | 2 +- ckan/lib/search/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ckan/config/solr/schema.xml b/ckan/config/solr/schema.xml index 0f144de200b..8055089fd18 100644 --- a/ckan/config/solr/schema.xml +++ b/ckan/config/solr/schema.xml @@ -24,7 +24,7 @@ - + diff --git a/ckan/lib/search/__init__.py b/ckan/lib/search/__init__.py index b3bb423c0ae..fe34325396a 100644 --- a/ckan/lib/search/__init__.py +++ b/ckan/lib/search/__init__.py @@ -31,7 +31,7 @@ def text_traceback(): return res -SUPPORTED_SCHEMA_VERSIONS = ['2.8' ,'2.81'] +SUPPORTED_SCHEMA_VERSIONS = ['2.8', '2.9'] DEFAULT_OPTIONS = { 'limit': 20, From 9edfe0c8f9a7cd5be032022f499ca3989bef9a5f Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Fri, 6 Jul 2018 16:02:11 +0300 Subject: [PATCH 03/99] apply permission labels --- ckan/logic/action/get.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index 0be5c932568..c0b45cc29fe 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -1497,10 +1497,19 @@ def package_autocomplete(context, data_dict): ''' _check_access('package_autocomplete', context, data_dict) + user = context.get('user') limit = data_dict.get('limit', 10) q = data_dict['q'] + # enforce permission filter based on user + if context.get('ignore_auth') or (user and authz.is_sysadmin(user)): + labels = None + else: + labels = lib_plugins.get_permission_labels().get_user_dataset_labels( + context['auth_user_obj'] + ) + data_dict = { 'fq': '+capacity:public', 'q': ' OR '.join([ @@ -1513,7 +1522,8 @@ def package_autocomplete(context, data_dict): 'rows': limit } query = search.query_for(model.Package) - results = query.run(data_dict)['results'] + + results = query.run(data_dict, permission_labels=labels)['results'] q_lower = q.lower() pkg_list = [] From 110e43d3637b81977d01470ea7bacc78540db226 Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Fri, 3 Aug 2018 19:54:48 +0200 Subject: [PATCH 04/99] [#4382] strip full URL before saving to DB --- ckan/lib/dictization/model_save.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ckan/lib/dictization/model_save.py b/ckan/lib/dictization/model_save.py index b0abf1abcea..d9670030fa4 100644 --- a/ckan/lib/dictization/model_save.py +++ b/ckan/lib/dictization/model_save.py @@ -13,7 +13,9 @@ log = logging.getLogger(__name__) + def resource_dict_save(res_dict, context): + model = context["model"] session = context["session"] @@ -30,6 +32,11 @@ def resource_dict_save(res_dict, context): table = class_mapper(model.Resource).mapped_table fields = [field.name for field in table.c] + # Strip the full url for resources of type 'upload' + if res_dict.get('url_type') == u'upload': + url = res_dict.get('url') + res_dict[u'url'] = url[url.rfind(u"/")+1:] + # Resource extras not submitted will be removed from the existing extras # dict new_extras = {} From 8210da1d2ab631760ec44053b0a949eb11f718f9 Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Mon, 6 Aug 2018 10:23:43 +0200 Subject: [PATCH 05/99] Changes according revieew --- ckan/lib/dictization/model_save.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ckan/lib/dictization/model_save.py b/ckan/lib/dictization/model_save.py index d9670030fa4..454159f41df 100644 --- a/ckan/lib/dictization/model_save.py +++ b/ckan/lib/dictization/model_save.py @@ -33,9 +33,8 @@ def resource_dict_save(res_dict, context): fields = [field.name for field in table.c] # Strip the full url for resources of type 'upload' - if res_dict.get('url_type') == u'upload': - url = res_dict.get('url') - res_dict[u'url'] = url[url.rfind(u"/")+1:] + if res_dict.get('url') and res_dict.get('url_type') == u'upload': + res_dict['url'] = res_dict['url'].rsplit('/')[-1] # Resource extras not submitted will be removed from the existing extras # dict From 469d068a01a33960f263142ea671aefed031547b Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Mon, 6 Aug 2018 14:32:47 +0200 Subject: [PATCH 06/99] Add tests --- .../lib/dictization/test_model_dictize.py | 39 +++++++++++++++++-- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/ckan/tests/lib/dictization/test_model_dictize.py b/ckan/tests/lib/dictization/test_model_dictize.py index 43de44788c1..01d8623c7ce 100644 --- a/ckan/tests/lib/dictization/test_model_dictize.py +++ b/ckan/tests/lib/dictization/test_model_dictize.py @@ -5,7 +5,7 @@ from nose.tools import assert_equal -from ckan.lib.dictization import model_dictize +from ckan.lib.dictization import model_dictize, model_save from ckan import model from ckan.lib import search @@ -401,8 +401,7 @@ def test_package_dictize_resource(self): result = model_dictize.package_dictize(dataset_obj, context) - assert_equal_for_keys(result['resources'][0], resource, - 'name', 'url') + assert_equal_for_keys(result['resources'][0], resource, 'name', 'url') expected_dict = { u'cache_last_updated': None, u'cache_url': None, @@ -422,6 +421,40 @@ def test_package_dictize_resource(self): } self.assert_equals_expected(expected_dict, result['resources'][0]) + def test_package_dictize_resource_upload_and_striped(self): + dataset = factories.Dataset() + resource = factories.Resource(package=dataset['id'], + name='test_pkg_dictize', + url_type='upload', + url='some_filename.csv') + + context = {'model': model, 'session': model.Session} + + result = model_save.resource_dict_save(resource, context) + + expected_dict = { + u'url': u'some_filename.csv', + u'url_type': u'upload' + } + assert expected_dict['url'] == result.url + + def test_package_dictize_resource_upload_with_url_and_striped(self): + dataset = factories.Dataset() + resource = factories.Resource(package=dataset['id'], + name='test_pkg_dictize', + url_type='upload', + url='http://some_filename.csv') + + context = {'model': model, 'session': model.Session} + + result = model_save.resource_dict_save(resource, context) + + expected_dict = { + u'url': u'some_filename.csv', + u'url_type': u'upload' + } + assert expected_dict['url'] == result.url + def test_package_dictize_tags(self): dataset = factories.Dataset(tags=[{'name': 'fish'}]) dataset_obj = model.Package.get(dataset['id']) From c6c252622b5c3c091a38159898db6e6f71dbfbb6 Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Thu, 16 Aug 2018 11:01:51 +0200 Subject: [PATCH 07/99] Setup --- ckan/lib/flask_cli.py | 19 +++++++++++++++++++ setup.py | 1 + 2 files changed, 20 insertions(+) create mode 100644 ckan/lib/flask_cli.py diff --git a/ckan/lib/flask_cli.py b/ckan/lib/flask_cli.py new file mode 100644 index 00000000000..270b2f8a957 --- /dev/null +++ b/ckan/lib/flask_cli.py @@ -0,0 +1,19 @@ +# encoding: utf-8 + +import os +import click +from flask import Flask, current_app +from flask.cli import FlaskGroup, with_appcontext + +from ckan.common import config +from ckan.lib.cli import (click_config_option, load_config, parse_db_config, + paster_click_group) + +import pdb; pdb.set_trace() +os.environ['FLASK_RUN_FROM_CLI'] = 'true' + +@click.help_option(u'-h', u'--help') +@click_config_option +def run(): + #load_config(config or ctx.obj['config']) + click.echo('Starting CKAN') diff --git a/setup.py b/setup.py index eaad14616ef..cc0abc13622 100644 --- a/setup.py +++ b/setup.py @@ -77,6 +77,7 @@ def parse_version(s): 'jobs = ckan.lib.cli:JobsCommand', ], 'console_scripts': [ + 'ckan = ckan.lib.flask_cli:run', 'ckan-admin = bin.ckan_admin:Command', ], 'paste.paster_create_template': [ From d1940a85b2fa24a48aa1094aa71b733bdc450995 Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Mon, 20 Aug 2018 15:06:49 +0200 Subject: [PATCH 08/99] Create flask_cli script --- ckan/lib/flask_cli.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/ckan/lib/flask_cli.py b/ckan/lib/flask_cli.py index 270b2f8a957..6fb8336128b 100644 --- a/ckan/lib/flask_cli.py +++ b/ckan/lib/flask_cli.py @@ -1,19 +1,35 @@ # encoding: utf-8 import os + import click from flask import Flask, current_app from flask.cli import FlaskGroup, with_appcontext +from werkzeug.serving import run_simple from ckan.common import config +from ckan.config.middleware import make_app from ckan.lib.cli import (click_config_option, load_config, parse_db_config, paster_click_group) -import pdb; pdb.set_trace() -os.environ['FLASK_RUN_FROM_CLI'] = 'true' +# os.environ['FLASK_RUN_FROM_CLI'] = 'true' + + +def _load_config(config=None): + + # app = make_app + # app.config.from_file + # return app + pass + + +def _helper(): + print('nothing') + @click.help_option(u'-h', u'--help') @click_config_option def run(): - #load_config(config or ctx.obj['config']) + # app = _load_config(config) click.echo('Starting CKAN') + run_simple('localhost', 5000, make_app, use_reloader=True, use_evalex=True) From 4ef766fe52de68e02a4ed23b488ecac8f0a23dea Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Tue, 21 Aug 2018 14:44:03 +0200 Subject: [PATCH 09/99] Run command working --- ckan/config/middleware/__init__.py | 1 + ckan/lib/flask_cli.py | 53 +++++++++++++++++++++++------- setup.py | 2 +- 3 files changed, 44 insertions(+), 12 deletions(-) diff --git a/ckan/config/middleware/__init__.py b/ckan/config/middleware/__init__.py index 73af42fa2f4..ca5c9b2f70c 100644 --- a/ckan/config/middleware/__init__.py +++ b/ckan/config/middleware/__init__.py @@ -53,6 +53,7 @@ def make_app(conf, full_stack=True, static_files=True, **app_conf): middleware. ''' + import pdb; pdb.set_trace() load_environment(conf, app_conf) pylons_app = make_pylons_stack(conf, full_stack, static_files, diff --git a/ckan/lib/flask_cli.py b/ckan/lib/flask_cli.py index 6fb8336128b..9795733d48a 100644 --- a/ckan/lib/flask_cli.py +++ b/ckan/lib/flask_cli.py @@ -4,7 +4,7 @@ import click from flask import Flask, current_app -from flask.cli import FlaskGroup, with_appcontext +from flask.cli import AppGroup, with_appcontext from werkzeug.serving import run_simple from ckan.common import config @@ -17,19 +17,50 @@ def _load_config(config=None): - # app = make_app - # app.config.from_file - # return app - pass + from paste.deploy import appconfig + from paste.script.util.logging_config import fileConfig + + if config: + filename = os.path.abspath(config) + config_source = '-c parameter' + elif os.environ.get('CKAN_INI'): + filename = os.environ.get('CKAN_INI') + config_source = '$CKAN_INI' + else: + default_filename = 'development.ini' + filename = os.path.join(os.getcwd(), default_filename) + if not os.path.exists(filename): + # give really clear error message for this common situation + msg = 'ERROR: You need to specify the CKAN config (.ini) '\ + 'file path.'\ + '\nUse the --config parameter or set environment ' \ + 'variable CKAN_INI or have {}\nin the current directory.' \ + .format(default_filename) + exit(msg) + + if not os.path.exists(filename): + msg = 'Config file not found: %s' % filename + msg += '\n(Given by: %s)' % config_source + exit(msg) + fileConfig(filename) + return appconfig('config:' + filename) -def _helper(): - print('nothing') + +@click.group() +@click.help_option(u'-h', u'--help') +@click.pass_context +def main(*args, **kwargs): + pass @click.help_option(u'-h', u'--help') +@main.command(u'run', short_help=u'Start development server') @click_config_option -def run(): - # app = _load_config(config) - click.echo('Starting CKAN') - run_simple('localhost', 5000, make_app, use_reloader=True, use_evalex=True) +@click.option(u'-p', u'--port', default=5000, help=u'Set port') +@click.option(u'-r', u'--reloader', default=True, help=u'Use reloader') +def run(config, port, reloader): + # click.echo(u'Starting CKAN') + conf = _load_config(config) + app = make_app(conf.global_conf, **conf.local_conf) + run_simple(u'localhost', port, app, use_reloader=reloader, use_evalex=True) diff --git a/setup.py b/setup.py index cc0abc13622..3a50ec426f4 100644 --- a/setup.py +++ b/setup.py @@ -77,7 +77,7 @@ def parse_version(s): 'jobs = ckan.lib.cli:JobsCommand', ], 'console_scripts': [ - 'ckan = ckan.lib.flask_cli:run', + 'ckan = ckan.lib.flask_cli:main', 'ckan-admin = bin.ckan_admin:Command', ], 'paste.paster_create_template': [ From 0ced79463907dd466e9892127e7def64f1c6fc16 Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Tue, 21 Aug 2018 14:47:59 +0200 Subject: [PATCH 10/99] Add Debugger app, remove pdb line --- ckan/config/middleware/__init__.py | 1 - ckan/config/middleware/flask_app.py | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/ckan/config/middleware/__init__.py b/ckan/config/middleware/__init__.py index ca5c9b2f70c..73af42fa2f4 100644 --- a/ckan/config/middleware/__init__.py +++ b/ckan/config/middleware/__init__.py @@ -53,7 +53,6 @@ def make_app(conf, full_stack=True, static_files=True, **app_conf): middleware. ''' - import pdb; pdb.set_trace() load_environment(conf, app_conf) pylons_app = make_pylons_stack(conf, full_stack, static_files, diff --git a/ckan/config/middleware/flask_app.py b/ckan/config/middleware/flask_app.py index 7edb1de929d..7aaf89fb85b 100644 --- a/ckan/config/middleware/flask_app.py +++ b/ckan/config/middleware/flask_app.py @@ -107,6 +107,10 @@ def make_flask_stack(conf, **app_conf): app.config['DEBUG_TB_INTERCEPT_REDIRECTS'] = False DebugToolbarExtension(app) + from werkzeug.debug import DebuggedApplication + app = DebuggedApplication(app, True) + app = app.app + # Use Beaker as the Flask session interface class BeakerSessionInterface(SessionInterface): def open_session(self, app, request): From 6faafd44bb42f9a05c5ef7715b82cd4a2dbf7b10 Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Wed, 22 Aug 2018 13:28:54 +0200 Subject: [PATCH 11/99] pep8 --- ckan/lib/flask_cli.py | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/ckan/lib/flask_cli.py b/ckan/lib/flask_cli.py index 9795733d48a..a15307ce7e9 100644 --- a/ckan/lib/flask_cli.py +++ b/ckan/lib/flask_cli.py @@ -22,45 +22,47 @@ def _load_config(config=None): if config: filename = os.path.abspath(config) - config_source = '-c parameter' - elif os.environ.get('CKAN_INI'): - filename = os.environ.get('CKAN_INI') - config_source = '$CKAN_INI' + config_source = u'-c parameter' + elif os.environ.get(u'CKAN_INI'): + filename = os.environ.get(u'CKAN_INI') + config_source = u'$CKAN_INI' else: - default_filename = 'development.ini' + default_filename = u'development.ini' filename = os.path.join(os.getcwd(), default_filename) if not os.path.exists(filename): # give really clear error message for this common situation - msg = 'ERROR: You need to specify the CKAN config (.ini) '\ - 'file path.'\ - '\nUse the --config parameter or set environment ' \ - 'variable CKAN_INI or have {}\nin the current directory.' \ + msg = u'ERROR: You need to specify the CKAN config (.ini) '\ + u'file path.'\ + u'\nUse the --config parameter or set environment ' \ + u'variable CKAN_INI or have {}\nin the current directory.' \ .format(default_filename) exit(msg) if not os.path.exists(filename): - msg = 'Config file not found: %s' % filename - msg += '\n(Given by: %s)' % config_source + msg = u'Config file not found: %s' % filename + msg += u'\n(Given by: %s)' % config_source exit(msg) fileConfig(filename) - return appconfig('config:' + filename) + return appconfig(u'config:' + filename) @click.group() @click.help_option(u'-h', u'--help') @click.pass_context -def main(*args, **kwargs): +def main(ctx, *args, **kwargs): + # ctx.obj['CUSTOM_CKAN_DEV_SERVER'] = True pass -@click.help_option(u'-h', u'--help') @main.command(u'run', short_help=u'Start development server') +@click.help_option(u'-h', u'--help') @click_config_option +@click.option(u'-H', u'--host', default=u'localhost', help=u'Set host') @click.option(u'-p', u'--port', default=5000, help=u'Set port') @click.option(u'-r', u'--reloader', default=True, help=u'Use reloader') -def run(config, port, reloader): - # click.echo(u'Starting CKAN') +def run(config, host, port, reloader): + '''Runs development server''' conf = _load_config(config) app = make_app(conf.global_conf, **conf.local_conf) - run_simple(u'localhost', port, app, use_reloader=reloader, use_evalex=True) + run_simple(host, port, app, use_reloader=reloader, use_evalex=True) From f525ba9c747db165f812a0b2f13f77c2d977ce8e Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Fri, 24 Aug 2018 13:59:43 +0200 Subject: [PATCH 12/99] Add DB command --- ckan/lib/flask_cli.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/ckan/lib/flask_cli.py b/ckan/lib/flask_cli.py index a15307ce7e9..eecb33f2143 100644 --- a/ckan/lib/flask_cli.py +++ b/ckan/lib/flask_cli.py @@ -8,6 +8,8 @@ from werkzeug.serving import run_simple from ckan.common import config +from ckan.config.environment import load_environment + from ckan.config.middleware import make_app from ckan.lib.cli import (click_config_option, load_config, parse_db_config, paster_click_group) @@ -49,9 +51,7 @@ def _load_config(config=None): @click.group() @click.help_option(u'-h', u'--help') -@click.pass_context -def main(ctx, *args, **kwargs): - # ctx.obj['CUSTOM_CKAN_DEV_SERVER'] = True +def main(*args, **kwargs): pass @@ -66,3 +66,19 @@ def run(config, host, port, reloader): conf = _load_config(config) app = make_app(conf.global_conf, **conf.local_conf) run_simple(host, port, app, use_reloader=reloader, use_evalex=True) + + +@main.command(u'db', short_help=u'Initialize the database') +@click.help_option(u'-h', u'--help') +@click_config_option +@click.argument(u'init') +def initdb(config, init): + '''Initialising the database''' + conf = _load_config(config) + # app = make_app(conf.global_conf, **conf.local_conf) + try: + import ckan.model as model + model.repo.init_db() + except Exception as e: + print e + print('Initialising DB: SUCCESS') From 7cfba2b48ee657b23f44d1ace25cfa84adae1183 Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Fri, 24 Aug 2018 14:26:33 +0200 Subject: [PATCH 13/99] Load env instead of creating the app --- ckan/lib/flask_cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckan/lib/flask_cli.py b/ckan/lib/flask_cli.py index eecb33f2143..9f1f0863864 100644 --- a/ckan/lib/flask_cli.py +++ b/ckan/lib/flask_cli.py @@ -75,7 +75,7 @@ def run(config, host, port, reloader): def initdb(config, init): '''Initialising the database''' conf = _load_config(config) - # app = make_app(conf.global_conf, **conf.local_conf) + load_environment(conf.global_conf, conf.local_conf) try: import ckan.model as model model.repo.init_db() From 9a3fff89e24733bd3230e32db2d76008108cbc34 Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Fri, 24 Aug 2018 14:27:38 +0200 Subject: [PATCH 14/99] Prefix strings --- ckan/lib/flask_cli.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ckan/lib/flask_cli.py b/ckan/lib/flask_cli.py index 9f1f0863864..87138f97cc8 100644 --- a/ckan/lib/flask_cli.py +++ b/ckan/lib/flask_cli.py @@ -62,7 +62,7 @@ def main(*args, **kwargs): @click.option(u'-p', u'--port', default=5000, help=u'Set port') @click.option(u'-r', u'--reloader', default=True, help=u'Use reloader') def run(config, host, port, reloader): - '''Runs development server''' + u'''Runs development server''' conf = _load_config(config) app = make_app(conf.global_conf, **conf.local_conf) run_simple(host, port, app, use_reloader=reloader, use_evalex=True) @@ -73,7 +73,7 @@ def run(config, host, port, reloader): @click_config_option @click.argument(u'init') def initdb(config, init): - '''Initialising the database''' + u'''Initialising the database''' conf = _load_config(config) load_environment(conf.global_conf, conf.local_conf) try: @@ -81,4 +81,4 @@ def initdb(config, init): model.repo.init_db() except Exception as e: print e - print('Initialising DB: SUCCESS') + print(u'Initialising DB: SUCCESS') From 052c4b0b019f43fdcc257fbd7663181fccf64af1 Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Fri, 24 Aug 2018 15:03:49 +0200 Subject: [PATCH 15/99] python3 compliance --- ckan/lib/flask_cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckan/lib/flask_cli.py b/ckan/lib/flask_cli.py index 87138f97cc8..f87b99dfa75 100644 --- a/ckan/lib/flask_cli.py +++ b/ckan/lib/flask_cli.py @@ -80,5 +80,5 @@ def initdb(config, init): import ckan.model as model model.repo.init_db() except Exception as e: - print e + print(e) print(u'Initialising DB: SUCCESS') From 27160c1051396f6627d4474324762a2912616704 Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Mon, 3 Sep 2018 19:16:40 +0200 Subject: [PATCH 16/99] Rearange the cli commands, into one folder, separated by subcommands --- ckan/cli/__init__.py | 52 +++++++++++++++++++++++++++++++++++++ ckan/cli/cli.py | 15 +++++++++++ ckan/cli/server/__init__.py | 0 ckan/cli/server/server.py | 27 +++++++++++++++++++ setup.py | 2 +- 5 files changed, 95 insertions(+), 1 deletion(-) create mode 100644 ckan/cli/__init__.py create mode 100644 ckan/cli/cli.py create mode 100644 ckan/cli/server/__init__.py create mode 100644 ckan/cli/server/server.py diff --git a/ckan/cli/__init__.py b/ckan/cli/__init__.py new file mode 100644 index 00000000000..2558e37d1fb --- /dev/null +++ b/ckan/cli/__init__.py @@ -0,0 +1,52 @@ +# encoding: utf-8 + +import os + +import click +from flask import Flask, current_app +from flask.cli import AppGroup, with_appcontext +from werkzeug.serving import run_simple + +from ckan.common import config +from ckan.config.environment import load_environment +from ckan.config.middleware import make_app + + +click_config_option = click.option( + '-c', + '--config', + default=None, + metavar='CONFIG', + help=u'Config file to use (default: development.ini)') + + +def load_config(config=None): + + from paste.deploy import appconfig + from paste.script.util.logging_config import fileConfig + + if config: + filename = os.path.abspath(config) + config_source = u'-c parameter' + elif os.environ.get(u'CKAN_INI'): + filename = os.environ.get(u'CKAN_INI') + config_source = u'$CKAN_INI' + else: + default_filename = u'development.ini' + filename = os.path.join(os.getcwd(), default_filename) + if not os.path.exists(filename): + # give really clear error message for this common situation + msg = u'ERROR: You need to specify the CKAN config (.ini) '\ + u'file path.'\ + u'\nUse the --config parameter or set environment ' \ + u'variable CKAN_INI or have {}\nin the current directory.' \ + .format(default_filename) + exit(msg) + + if not os.path.exists(filename): + msg = u'Config file not found: %s' % filename + msg += u'\n(Given by: %s)' % config_source + exit(msg) + + fileConfig(filename) + return appconfig(u'config:' + filename) diff --git a/ckan/cli/cli.py b/ckan/cli/cli.py new file mode 100644 index 00000000000..9b2f4383c50 --- /dev/null +++ b/ckan/cli/cli.py @@ -0,0 +1,15 @@ +# encoding: utf-8 + +import os + +import click + + +@click.group() +@click.help_option(u'-h', u'--help') +def ckan(*args, **kwargs): + pass + + +from ckan.cli.server.server import run +ckan.add_command(run) diff --git a/ckan/cli/server/__init__.py b/ckan/cli/server/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/ckan/cli/server/server.py b/ckan/cli/server/server.py new file mode 100644 index 00000000000..79f23c7a383 --- /dev/null +++ b/ckan/cli/server/server.py @@ -0,0 +1,27 @@ +# encoding: utf-8 + +import os + +import click +from flask import Flask, current_app +from flask.cli import AppGroup, with_appcontext +from werkzeug.serving import run_simple + +from ckan.common import config +from ckan.config.environment import load_environment + +from ckan.config.middleware import make_app +from ckan.cli import load_config, click_config_option +from ckan.cli.cli import ckan + +@ckan.command(u'run', short_help=u'Start development server') +@click.help_option(u'-h', u'--help') +@click_config_option +@click.option(u'-H', u'--host', default=u'localhost', help=u'Set host') +@click.option(u'-p', u'--port', default=5000, help=u'Set port') +@click.option(u'-r', u'--reloader', default=True, help=u'Use reloader') +def run(config, host, port, reloader): + u'''Runs development server''' + conf = load_config(config) + app = make_app(conf.global_conf, **conf.local_conf) + run_simple(host, port, app, use_reloader=reloader, use_evalex=True) diff --git a/setup.py b/setup.py index 3a50ec426f4..4d949966085 100644 --- a/setup.py +++ b/setup.py @@ -77,7 +77,7 @@ def parse_version(s): 'jobs = ckan.lib.cli:JobsCommand', ], 'console_scripts': [ - 'ckan = ckan.lib.flask_cli:main', + 'ckan = ckan.cli.cli:ckan', 'ckan-admin = bin.ckan_admin:Command', ], 'paste.paster_create_template': [ From b61b823b43643ec0bd4dcb418df27f6c64d73b4c Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Tue, 4 Sep 2018 09:48:04 +0200 Subject: [PATCH 17/99] Add database commands, refactor the code --- ckan/cli/__init__.py | 4 +++- ckan/cli/cli.py | 5 +++-- ckan/cli/database/__init__.py | 0 ckan/cli/database/db.py | 26 ++++++++++++++++++++++++++ ckan/cli/server/server.py | 8 ++------ 5 files changed, 34 insertions(+), 9 deletions(-) create mode 100644 ckan/cli/database/__init__.py create mode 100644 ckan/cli/database/db.py diff --git a/ckan/cli/__init__.py b/ckan/cli/__init__.py index 2558e37d1fb..812af7701e7 100644 --- a/ckan/cli/__init__.py +++ b/ckan/cli/__init__.py @@ -20,7 +20,7 @@ help=u'Config file to use (default: development.ini)') -def load_config(config=None): +def _load_config(config=None): from paste.deploy import appconfig from paste.script.util.logging_config import fileConfig @@ -50,3 +50,5 @@ def load_config(config=None): fileConfig(filename) return appconfig(u'config:' + filename) + +load_config = _load_config diff --git a/ckan/cli/cli.py b/ckan/cli/cli.py index 9b2f4383c50..2de5d62fbde 100644 --- a/ckan/cli/cli.py +++ b/ckan/cli/cli.py @@ -3,6 +3,8 @@ import os import click +from ckan.cli.server.server import run +from ckan.cli.database.db import initdb @click.group() @@ -10,6 +12,5 @@ def ckan(*args, **kwargs): pass - -from ckan.cli.server.server import run ckan.add_command(run) +ckan.add_command(initdb) diff --git a/ckan/cli/database/__init__.py b/ckan/cli/database/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/ckan/cli/database/db.py b/ckan/cli/database/db.py new file mode 100644 index 00000000000..9a0332cbc24 --- /dev/null +++ b/ckan/cli/database/db.py @@ -0,0 +1,26 @@ +# encoding: utf-8 + +import os + +import click +from flask import Flask, current_app +from werkzeug.serving import run_simple + +from ckan.config.middleware import make_app +from ckan.cli import load_config, click_config_option + + +@click.command(u'db', short_help=u'Initialize the database') +@click.help_option(u'-h', u'--help') +@click_config_option +@click.argument(u'init') +def initdb(config, init): + u'''Initialising the database''' + conf = _load_config(config) + load_environment(conf.global_conf, conf.local_conf) + try: + import ckan.model as model + model.repo.init_db() + except Exception as e: + print(e) + print(u'Initialising DB: SUCCESS') diff --git a/ckan/cli/server/server.py b/ckan/cli/server/server.py index 79f23c7a383..2852810fc35 100644 --- a/ckan/cli/server/server.py +++ b/ckan/cli/server/server.py @@ -4,17 +4,13 @@ import click from flask import Flask, current_app -from flask.cli import AppGroup, with_appcontext from werkzeug.serving import run_simple -from ckan.common import config -from ckan.config.environment import load_environment - from ckan.config.middleware import make_app from ckan.cli import load_config, click_config_option -from ckan.cli.cli import ckan -@ckan.command(u'run', short_help=u'Start development server') + +@click.command(u'run', short_help=u'Start development server') @click.help_option(u'-h', u'--help') @click_config_option @click.option(u'-H', u'--host', default=u'localhost', help=u'Set host') From e51a1e2c04d1baaa8745a76b8ddb2e3ca467c5d1 Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Tue, 4 Sep 2018 13:31:13 +0200 Subject: [PATCH 18/99] Update the database commands --- ckan/cli/__init__.py | 9 +++++--- ckan/cli/cli.py | 4 ++-- ckan/cli/database/db.py | 47 ++++++++++++++++++++++++++++++++++++----- 3 files changed, 50 insertions(+), 10 deletions(-) diff --git a/ckan/cli/__init__.py b/ckan/cli/__init__.py index 812af7701e7..1af7a95879f 100644 --- a/ckan/cli/__init__.py +++ b/ckan/cli/__init__.py @@ -3,6 +3,7 @@ import os import click +import logging from flask import Flask, current_app from flask.cli import AppGroup, with_appcontext from werkzeug.serving import run_simple @@ -11,12 +12,13 @@ from ckan.config.environment import load_environment from ckan.config.middleware import make_app +log = logging.getLogger(__name__) click_config_option = click.option( - '-c', - '--config', + u'-c', + u'--config', default=None, - metavar='CONFIG', + metavar=u'CONFIG', help=u'Config file to use (default: development.ini)') @@ -49,6 +51,7 @@ def _load_config(config=None): exit(msg) fileConfig(filename) + log.debug("Using " + filename) return appconfig(u'config:' + filename) load_config = _load_config diff --git a/ckan/cli/cli.py b/ckan/cli/cli.py index 2de5d62fbde..cdcf28b58c3 100644 --- a/ckan/cli/cli.py +++ b/ckan/cli/cli.py @@ -4,7 +4,7 @@ import click from ckan.cli.server.server import run -from ckan.cli.database.db import initdb +from ckan.cli.database.db import db @click.group() @@ -13,4 +13,4 @@ def ckan(*args, **kwargs): pass ckan.add_command(run) -ckan.add_command(initdb) +ckan.add_command(db) diff --git a/ckan/cli/database/db.py b/ckan/cli/database/db.py index 9a0332cbc24..d72c8f21949 100644 --- a/ckan/cli/database/db.py +++ b/ckan/cli/database/db.py @@ -4,19 +4,26 @@ import click from flask import Flask, current_app + from werkzeug.serving import run_simple +from ckan.cli import click_config_option, load_config +from ckan.config.environment import load_environment from ckan.config.middleware import make_app -from ckan.cli import load_config, click_config_option -@click.command(u'db', short_help=u'Initialize the database') +@click.group(name=u'db', short_help=u'Database commands') +@click.help_option(u'-h', u'--help') +def db(): + pass + + +@db.command(u'init', short_help=u'Initialaze the database') @click.help_option(u'-h', u'--help') @click_config_option -@click.argument(u'init') -def initdb(config, init): +def initdb(config): u'''Initialising the database''' - conf = _load_config(config) + conf = load_config(config) load_environment(conf.global_conf, conf.local_conf) try: import ckan.model as model @@ -24,3 +31,33 @@ def initdb(config, init): except Exception as e: print(e) print(u'Initialising DB: SUCCESS') + + +@db.command(u'clean', short_help=u'Clean the database') +@click.help_option(u'-h', u'--help') +@click_config_option +def initdb(config): + u'''Cleaning the database''' + conf = load_config(config) + load_environment(conf.global_conf, conf.local_conf) + try: + import ckan.model as model + model.repo.clean_db() + except Exception as e: + print(e) + print(u'Cleaning DB: SUCCESS') + + +@db.command(u'upgrade', short_help=u'Upgrade the database') +@click.help_option(u'-h', u'--help') +@click_config_option +def initdb(config): + u'''Upgrading the database''' + conf = load_config(config) + load_environment(conf.global_conf, conf.local_conf) + try: + import ckan.model as model + model.repo.clean_db() + except Exception as e: + print(e) + print(u'Upgrading DB: SUCCESS') From 3af4b0be84ed554f8db423dc396683bf0f97a106 Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Tue, 4 Sep 2018 13:49:17 +0200 Subject: [PATCH 19/99] pep8 and typo --- ckan/cli/__init__.py | 4 +++- ckan/cli/database/db.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/ckan/cli/__init__.py b/ckan/cli/__init__.py index 1af7a95879f..4d9b4855411 100644 --- a/ckan/cli/__init__.py +++ b/ckan/cli/__init__.py @@ -12,8 +12,10 @@ from ckan.config.environment import load_environment from ckan.config.middleware import make_app + log = logging.getLogger(__name__) + click_config_option = click.option( u'-c', u'--config', @@ -51,7 +53,7 @@ def _load_config(config=None): exit(msg) fileConfig(filename) - log.debug("Using " + filename) + log.debug(u"Using " + filename) return appconfig(u'config:' + filename) load_config = _load_config diff --git a/ckan/cli/database/db.py b/ckan/cli/database/db.py index d72c8f21949..868d3d0711d 100644 --- a/ckan/cli/database/db.py +++ b/ckan/cli/database/db.py @@ -57,7 +57,7 @@ def initdb(config): load_environment(conf.global_conf, conf.local_conf) try: import ckan.model as model - model.repo.clean_db() + model.repo.upgrade_db() except Exception as e: print(e) print(u'Upgrading DB: SUCCESS') From 04b3c0d58227e3d04fb00aa81e4dd85b1b30e1e9 Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Tue, 4 Sep 2018 14:02:08 +0200 Subject: [PATCH 20/99] Typos, add confirmation prompt for cleaning the DB --- ckan/cli/database/db.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ckan/cli/database/db.py b/ckan/cli/database/db.py index 868d3d0711d..60196fe4598 100644 --- a/ckan/cli/database/db.py +++ b/ckan/cli/database/db.py @@ -36,7 +36,8 @@ def initdb(config): @db.command(u'clean', short_help=u'Clean the database') @click.help_option(u'-h', u'--help') @click_config_option -def initdb(config): +@click.confirmation_option(prompt=u'This will delete all the data! Do you want to continue?') +def cleandb(config): u'''Cleaning the database''' conf = load_config(config) load_environment(conf.global_conf, conf.local_conf) @@ -51,7 +52,7 @@ def initdb(config): @db.command(u'upgrade', short_help=u'Upgrade the database') @click.help_option(u'-h', u'--help') @click_config_option -def initdb(config): +def updatedb(config): u'''Upgrading the database''' conf = load_config(config) load_environment(conf.global_conf, conf.local_conf) From 2fc91eb9cc755d87b78965d3086ac001b47234cf Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Tue, 4 Sep 2018 14:29:19 +0200 Subject: [PATCH 21/99] Move all command scripts in same folder --- ckan/cli/__init__.py | 1 + ckan/cli/cli.py | 8 ++++---- ckan/cli/database/__init__.py | 0 ckan/cli/{database => }/db.py | 4 ++-- ckan/cli/{server => }/server.py | 0 ckan/cli/server/__init__.py | 0 6 files changed, 7 insertions(+), 6 deletions(-) delete mode 100644 ckan/cli/database/__init__.py rename ckan/cli/{database => }/db.py (93%) rename ckan/cli/{server => }/server.py (100%) delete mode 100644 ckan/cli/server/__init__.py diff --git a/ckan/cli/__init__.py b/ckan/cli/__init__.py index 4d9b4855411..20f760c30ee 100644 --- a/ckan/cli/__init__.py +++ b/ckan/cli/__init__.py @@ -56,4 +56,5 @@ def _load_config(config=None): log.debug(u"Using " + filename) return appconfig(u'config:' + filename) + load_config = _load_config diff --git a/ckan/cli/cli.py b/ckan/cli/cli.py index cdcf28b58c3..1bbdbeef8cd 100644 --- a/ckan/cli/cli.py +++ b/ckan/cli/cli.py @@ -3,8 +3,7 @@ import os import click -from ckan.cli.server.server import run -from ckan.cli.database.db import db +from ckan.cli import db, server @click.group() @@ -12,5 +11,6 @@ def ckan(*args, **kwargs): pass -ckan.add_command(run) -ckan.add_command(db) + +ckan.add_command(server.run) +ckan.add_command(db.db) diff --git a/ckan/cli/database/__init__.py b/ckan/cli/database/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/ckan/cli/database/db.py b/ckan/cli/db.py similarity index 93% rename from ckan/cli/database/db.py rename to ckan/cli/db.py index 60196fe4598..27c95712bb2 100644 --- a/ckan/cli/database/db.py +++ b/ckan/cli/db.py @@ -32,11 +32,11 @@ def initdb(config): print(e) print(u'Initialising DB: SUCCESS') - +prompt_msg =u'This will delete all of your data!\nDo you want to continue?' @db.command(u'clean', short_help=u'Clean the database') @click.help_option(u'-h', u'--help') @click_config_option -@click.confirmation_option(prompt=u'This will delete all the data! Do you want to continue?') +@click.confirmation_option(prompt=prompt_msg) def cleandb(config): u'''Cleaning the database''' conf = load_config(config) diff --git a/ckan/cli/server/server.py b/ckan/cli/server.py similarity index 100% rename from ckan/cli/server/server.py rename to ckan/cli/server.py diff --git a/ckan/cli/server/__init__.py b/ckan/cli/server/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 From 390ae6a5a0709df2a199588207fccd13aa5477e5 Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Thu, 6 Sep 2018 13:29:18 +0200 Subject: [PATCH 22/99] Add search index commands --- ckan/cli/__init__.py | 9 +++------ ckan/cli/cli.py | 3 ++- ckan/cli/db.py | 22 +++++++++++----------- 3 files changed, 16 insertions(+), 18 deletions(-) diff --git a/ckan/cli/__init__.py b/ckan/cli/__init__.py index 20f760c30ee..799258f1d0b 100644 --- a/ckan/cli/__init__.py +++ b/ckan/cli/__init__.py @@ -24,7 +24,7 @@ help=u'Config file to use (default: development.ini)') -def _load_config(config=None): +def load_config(config=None): from paste.deploy import appconfig from paste.script.util.logging_config import fileConfig @@ -48,13 +48,10 @@ def _load_config(config=None): exit(msg) if not os.path.exists(filename): - msg = u'Config file not found: %s' % filename - msg += u'\n(Given by: %s)' % config_source + msg = 'Config file not found: %s' % filename + msg += '\n(Given by: %s)' % config_source exit(msg) fileConfig(filename) log.debug(u"Using " + filename) return appconfig(u'config:' + filename) - - -load_config = _load_config diff --git a/ckan/cli/cli.py b/ckan/cli/cli.py index 1bbdbeef8cd..b7aef2a0f68 100644 --- a/ckan/cli/cli.py +++ b/ckan/cli/cli.py @@ -3,7 +3,7 @@ import os import click -from ckan.cli import db, server +from ckan.cli import db, server, search_index @click.group() @@ -14,3 +14,4 @@ def ckan(*args, **kwargs): ckan.add_command(server.run) ckan.add_command(db.db) +#ckan.add_command(search_index.search) diff --git a/ckan/cli/db.py b/ckan/cli/db.py index 27c95712bb2..2a6a3a30899 100644 --- a/ckan/cli/db.py +++ b/ckan/cli/db.py @@ -4,12 +4,9 @@ import click from flask import Flask, current_app - from werkzeug.serving import run_simple -from ckan.cli import click_config_option, load_config -from ckan.config.environment import load_environment -from ckan.config.middleware import make_app +from ckan.cli import click_config_option @click.group(name=u'db', short_help=u'Database commands') @@ -23,8 +20,8 @@ def db(): @click_config_option def initdb(config): u'''Initialising the database''' - conf = load_config(config) - load_environment(conf.global_conf, conf.local_conf) + # conf = load_config(config) + # load_environment(conf.global_conf, conf.local_conf) try: import ckan.model as model model.repo.init_db() @@ -32,15 +29,18 @@ def initdb(config): print(e) print(u'Initialising DB: SUCCESS') -prompt_msg =u'This will delete all of your data!\nDo you want to continue?' + +prompt_msg = u'This will delete all of your data!\nDo you want to continue?' + + @db.command(u'clean', short_help=u'Clean the database') @click.help_option(u'-h', u'--help') @click_config_option @click.confirmation_option(prompt=prompt_msg) def cleandb(config): u'''Cleaning the database''' - conf = load_config(config) - load_environment(conf.global_conf, conf.local_conf) + # conf = load_config(config) + # load_environment(conf.global_conf, conf.local_conf) try: import ckan.model as model model.repo.clean_db() @@ -54,8 +54,8 @@ def cleandb(config): @click_config_option def updatedb(config): u'''Upgrading the database''' - conf = load_config(config) - load_environment(conf.global_conf, conf.local_conf) + # conf = load_config(config) + # load_environment(conf.global_conf, conf.local_conf) try: import ckan.model as model model.repo.upgrade_db() From f52b2ad0e4df1127716aeaad5c5bae08161c2004 Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Fri, 7 Sep 2018 14:14:39 +0200 Subject: [PATCH 23/99] Small refactoring of the code --- ckan/cli/cli.py | 19 +++++++++++++++---- ckan/cli/db.py | 15 +++------------ ckan/cli/search_index.py | 27 +++++++++++++++++++++++++++ ckan/cli/server.py | 9 +++------ 4 files changed, 48 insertions(+), 22 deletions(-) create mode 100644 ckan/cli/search_index.py diff --git a/ckan/cli/cli.py b/ckan/cli/cli.py index b7aef2a0f68..774e6c2ac4a 100644 --- a/ckan/cli/cli.py +++ b/ckan/cli/cli.py @@ -3,15 +3,26 @@ import os import click -from ckan.cli import db, server, search_index + +from ckan.cli import db, load_config, click_config_option, search_index, server +from ckan.config.middleware import make_app + + +class CkanCommand(object): + + def __init__(self, config=None): + self.config = load_config(config) + self.app = make_app(self.config.global_conf, **self.config.local_conf) @click.group() @click.help_option(u'-h', u'--help') -def ckan(*args, **kwargs): - pass +@click_config_option +@click.pass_context +def ckan(ctx, config, *args, **kwargs): + ctx.obj = CkanCommand(config) ckan.add_command(server.run) ckan.add_command(db.db) -#ckan.add_command(search_index.search) +ckan.add_command(search_index.search_index) diff --git a/ckan/cli/db.py b/ckan/cli/db.py index 2a6a3a30899..32b42910384 100644 --- a/ckan/cli/db.py +++ b/ckan/cli/db.py @@ -17,11 +17,8 @@ def db(): @db.command(u'init', short_help=u'Initialaze the database') @click.help_option(u'-h', u'--help') -@click_config_option -def initdb(config): +def initdb(): u'''Initialising the database''' - # conf = load_config(config) - # load_environment(conf.global_conf, conf.local_conf) try: import ckan.model as model model.repo.init_db() @@ -35,12 +32,9 @@ def initdb(config): @db.command(u'clean', short_help=u'Clean the database') @click.help_option(u'-h', u'--help') -@click_config_option @click.confirmation_option(prompt=prompt_msg) -def cleandb(config): +def cleandb(): u'''Cleaning the database''' - # conf = load_config(config) - # load_environment(conf.global_conf, conf.local_conf) try: import ckan.model as model model.repo.clean_db() @@ -51,11 +45,8 @@ def cleandb(config): @db.command(u'upgrade', short_help=u'Upgrade the database') @click.help_option(u'-h', u'--help') -@click_config_option -def updatedb(config): +def updatedb(): u'''Upgrading the database''' - # conf = load_config(config) - # load_environment(conf.global_conf, conf.local_conf) try: import ckan.model as model model.repo.upgrade_db() diff --git a/ckan/cli/search_index.py b/ckan/cli/search_index.py new file mode 100644 index 00000000000..2ed1c4df3d3 --- /dev/null +++ b/ckan/cli/search_index.py @@ -0,0 +1,27 @@ +# encoding: utf-8 + +import os + +import click +from flask import Flask, current_app +from werkzeug.serving import run_simple + +from ckan.cli import click_config_option + + +@click.group(name=u'search-index', short_help=u'Search index commands') +@click.help_option(u'-h', u'--help') +def search_index(): + pass + + +@search_index.command(name=u'rebuild', short_help=u'Rebuild search index') +@click.option(u'-i', u'--force', is_flag=True, + help=u'Ignore exceptions when rebuilding the index') +@click.option(u'-r', u'--refresh') +@click.option(u'-o', u'--only-missing') +def rebuild(force, only_missing): + ''' Rebuild search index ''' + from ckan.lib.search import rebuild, commit + # if force: + # rebuild() diff --git a/ckan/cli/server.py b/ckan/cli/server.py index 2852810fc35..2d6333dec85 100644 --- a/ckan/cli/server.py +++ b/ckan/cli/server.py @@ -6,18 +6,15 @@ from flask import Flask, current_app from werkzeug.serving import run_simple -from ckan.config.middleware import make_app from ckan.cli import load_config, click_config_option - @click.command(u'run', short_help=u'Start development server') @click.help_option(u'-h', u'--help') @click_config_option @click.option(u'-H', u'--host', default=u'localhost', help=u'Set host') @click.option(u'-p', u'--port', default=5000, help=u'Set port') @click.option(u'-r', u'--reloader', default=True, help=u'Use reloader') -def run(config, host, port, reloader): +@click.pass_context +def run(ctx, config, host, port, reloader): u'''Runs development server''' - conf = load_config(config) - app = make_app(conf.global_conf, **conf.local_conf) - run_simple(host, port, app, use_reloader=reloader, use_evalex=True) + run_simple(host, port, ctx.obj.app, use_reloader=reloader, use_evalex=True) From bae69518d6686e8abef327890ce20d22acbaae14 Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Mon, 10 Sep 2018 10:44:09 +0200 Subject: [PATCH 24/99] Update DB commands --- ckan/cli/db.py | 19 ++++++++++++++++--- ckan/cli/server.py | 1 + 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/ckan/cli/db.py b/ckan/cli/db.py index 32b42910384..58e9b122c56 100644 --- a/ckan/cli/db.py +++ b/ckan/cli/db.py @@ -31,8 +31,8 @@ def initdb(): @db.command(u'clean', short_help=u'Clean the database') -@click.help_option(u'-h', u'--help') @click.confirmation_option(prompt=prompt_msg) +@click.help_option(u'-h', u'--help') def cleandb(): u'''Cleaning the database''' try: @@ -44,12 +44,25 @@ def cleandb(): @db.command(u'upgrade', short_help=u'Upgrade the database') +@click.option(u'-v', u'--version', help='Migration version') @click.help_option(u'-h', u'--help') -def updatedb(): +def updatedb(version=None): u'''Upgrading the database''' try: import ckan.model as model - model.repo.upgrade_db() + model.repo.upgrade_db(version) except Exception as e: print(e) print(u'Upgrading DB: SUCCESS') + + +@db.command(u'version', short_help=u'Returns current version of data schema') +@click.help_option(u'-h', u'--help') +def version(): + u'''Return current version''' + try: + from ckan.model import Session + print(Session.execute('select version from ' + 'migrate_version;').fetchall()) + except Exception as e: + print(e) diff --git a/ckan/cli/server.py b/ckan/cli/server.py index 2d6333dec85..387bc301704 100644 --- a/ckan/cli/server.py +++ b/ckan/cli/server.py @@ -8,6 +8,7 @@ from ckan.cli import load_config, click_config_option + @click.command(u'run', short_help=u'Start development server') @click.help_option(u'-h', u'--help') @click_config_option From 2886f6ddd2e00f86b666ee8878ecea4f8605d36f Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Mon, 10 Sep 2018 10:44:24 +0200 Subject: [PATCH 25/99] Update DB commands --- ckan/cli/db.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ckan/cli/db.py b/ckan/cli/db.py index 58e9b122c56..78091dfbc8d 100644 --- a/ckan/cli/db.py +++ b/ckan/cli/db.py @@ -62,7 +62,7 @@ def version(): u'''Return current version''' try: from ckan.model import Session - print(Session.execute('select version from ' - 'migrate_version;').fetchall()) + print(Session.execute(u'select version from ' + u'migrate_version;').fetchall()) except Exception as e: print(e) From eea11bc4a67357b6e4615faf1aeb860599ccf152 Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Mon, 10 Sep 2018 10:46:15 +0200 Subject: [PATCH 26/99] Update DB commands --- ckan/cli/__init__.py | 4 ++-- ckan/cli/db.py | 2 +- ckan/cli/search_index.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ckan/cli/__init__.py b/ckan/cli/__init__.py index 799258f1d0b..e450bf32e50 100644 --- a/ckan/cli/__init__.py +++ b/ckan/cli/__init__.py @@ -48,8 +48,8 @@ def load_config(config=None): exit(msg) if not os.path.exists(filename): - msg = 'Config file not found: %s' % filename - msg += '\n(Given by: %s)' % config_source + msg = u'Config file not found: %s' % filename + msg += u'\n(Given by: %s)' % config_source exit(msg) fileConfig(filename) diff --git a/ckan/cli/db.py b/ckan/cli/db.py index 78091dfbc8d..7b44b3a4823 100644 --- a/ckan/cli/db.py +++ b/ckan/cli/db.py @@ -44,7 +44,7 @@ def cleandb(): @db.command(u'upgrade', short_help=u'Upgrade the database') -@click.option(u'-v', u'--version', help='Migration version') +@click.option(u'-v', u'--version', help=u'Migration version') @click.help_option(u'-h', u'--help') def updatedb(version=None): u'''Upgrading the database''' diff --git a/ckan/cli/search_index.py b/ckan/cli/search_index.py index 2ed1c4df3d3..a4123a33f4c 100644 --- a/ckan/cli/search_index.py +++ b/ckan/cli/search_index.py @@ -21,7 +21,7 @@ def search_index(): @click.option(u'-r', u'--refresh') @click.option(u'-o', u'--only-missing') def rebuild(force, only_missing): - ''' Rebuild search index ''' + u''' Rebuild search index ''' from ckan.lib.search import rebuild, commit # if force: # rebuild() From 09060854d9ab942d57e2a1ed4b402a5c8c183851 Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Tue, 11 Sep 2018 09:32:23 +0200 Subject: [PATCH 27/99] Remove script --- ckan/lib/flask_cli.py | 84 ------------------------------------------- 1 file changed, 84 deletions(-) delete mode 100644 ckan/lib/flask_cli.py diff --git a/ckan/lib/flask_cli.py b/ckan/lib/flask_cli.py deleted file mode 100644 index f87b99dfa75..00000000000 --- a/ckan/lib/flask_cli.py +++ /dev/null @@ -1,84 +0,0 @@ -# encoding: utf-8 - -import os - -import click -from flask import Flask, current_app -from flask.cli import AppGroup, with_appcontext -from werkzeug.serving import run_simple - -from ckan.common import config -from ckan.config.environment import load_environment - -from ckan.config.middleware import make_app -from ckan.lib.cli import (click_config_option, load_config, parse_db_config, - paster_click_group) - -# os.environ['FLASK_RUN_FROM_CLI'] = 'true' - - -def _load_config(config=None): - - from paste.deploy import appconfig - from paste.script.util.logging_config import fileConfig - - if config: - filename = os.path.abspath(config) - config_source = u'-c parameter' - elif os.environ.get(u'CKAN_INI'): - filename = os.environ.get(u'CKAN_INI') - config_source = u'$CKAN_INI' - else: - default_filename = u'development.ini' - filename = os.path.join(os.getcwd(), default_filename) - if not os.path.exists(filename): - # give really clear error message for this common situation - msg = u'ERROR: You need to specify the CKAN config (.ini) '\ - u'file path.'\ - u'\nUse the --config parameter or set environment ' \ - u'variable CKAN_INI or have {}\nin the current directory.' \ - .format(default_filename) - exit(msg) - - if not os.path.exists(filename): - msg = u'Config file not found: %s' % filename - msg += u'\n(Given by: %s)' % config_source - exit(msg) - - fileConfig(filename) - return appconfig(u'config:' + filename) - - -@click.group() -@click.help_option(u'-h', u'--help') -def main(*args, **kwargs): - pass - - -@main.command(u'run', short_help=u'Start development server') -@click.help_option(u'-h', u'--help') -@click_config_option -@click.option(u'-H', u'--host', default=u'localhost', help=u'Set host') -@click.option(u'-p', u'--port', default=5000, help=u'Set port') -@click.option(u'-r', u'--reloader', default=True, help=u'Use reloader') -def run(config, host, port, reloader): - u'''Runs development server''' - conf = _load_config(config) - app = make_app(conf.global_conf, **conf.local_conf) - run_simple(host, port, app, use_reloader=reloader, use_evalex=True) - - -@main.command(u'db', short_help=u'Initialize the database') -@click.help_option(u'-h', u'--help') -@click_config_option -@click.argument(u'init') -def initdb(config, init): - u'''Initialising the database''' - conf = _load_config(config) - load_environment(conf.global_conf, conf.local_conf) - try: - import ckan.model as model - model.repo.init_db() - except Exception as e: - print(e) - print(u'Initialising DB: SUCCESS') From f6379768e82f4de2a51a70f9770f085ac801770a Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Thu, 13 Sep 2018 09:00:53 +0200 Subject: [PATCH 28/99] Search index rebuild, replace prints with click.echo --- ckan/cli/db.py | 22 ++++++++++++---------- ckan/cli/search_index.py | 30 +++++++++++++++++++++++++----- ckan/cli/server.py | 1 + 3 files changed, 38 insertions(+), 15 deletions(-) diff --git a/ckan/cli/db.py b/ckan/cli/db.py index 7b44b3a4823..ccf2d55541c 100644 --- a/ckan/cli/db.py +++ b/ckan/cli/db.py @@ -23,15 +23,15 @@ def initdb(): import ckan.model as model model.repo.init_db() except Exception as e: - print(e) + click.echo(e, err=True) print(u'Initialising DB: SUCCESS') -prompt_msg = u'This will delete all of your data!\nDo you want to continue?' +PROMPT_MSG = u'This will delete all of your data!\nDo you want to continue?' @db.command(u'clean', short_help=u'Clean the database') -@click.confirmation_option(prompt=prompt_msg) +@click.confirmation_option(prompt=PROMPT_MSG) @click.help_option(u'-h', u'--help') def cleandb(): u'''Cleaning the database''' @@ -39,8 +39,8 @@ def cleandb(): import ckan.model as model model.repo.clean_db() except Exception as e: - print(e) - print(u'Cleaning DB: SUCCESS') + click.echo(e, err=True) + click.secho(u'Cleaning DB: SUCCESS', color="green", bold=True) @db.command(u'upgrade', short_help=u'Upgrade the database') @@ -52,8 +52,8 @@ def updatedb(version=None): import ckan.model as model model.repo.upgrade_db(version) except Exception as e: - print(e) - print(u'Upgrading DB: SUCCESS') + click.echo(e, err=True) + click.secho(u'Upgrading DB: SUCCESS', fg='green', bold=True) @db.command(u'version', short_help=u'Returns current version of data schema') @@ -62,7 +62,9 @@ def version(): u'''Return current version''' try: from ckan.model import Session - print(Session.execute(u'select version from ' - u'migrate_version;').fetchall()) + ver = Session.execute(u'select version from ' + u'migrate_version;').fetchall() + click.secho(u"Latest data schema version: {0}".format(ver[0][0]), + fg="green", bold=True) except Exception as e: - print(e) + click.echo(e, err=True) diff --git a/ckan/cli/search_index.py b/ckan/cli/search_index.py index a4123a33f4c..99f55255009 100644 --- a/ckan/cli/search_index.py +++ b/ckan/cli/search_index.py @@ -16,12 +16,32 @@ def search_index(): @search_index.command(name=u'rebuild', short_help=u'Rebuild search index') +@click.help_option(u'-h', u'--help') +@click.option(u'-v', u'--verbose', is_flag=True) @click.option(u'-i', u'--force', is_flag=True, help=u'Ignore exceptions when rebuilding the index') -@click.option(u'-r', u'--refresh') -@click.option(u'-o', u'--only-missing') -def rebuild(force, only_missing): +@click.option(u'-r', u'--refresh', help=u'Refresh current index', is_flag=True) +@click.option(u'-o', u'--only-missing', + help=u'Index non indexed datasets only', is_flag=True) +@click.option(u'-q', u'--quiet', help=u'Do not output index rebuild progress', + is_flag=True) +@click.option(u'-e', u'--commit-each', is_flag=True, + help=u'Perform a commit after indexing each dataset. This' + u'ensures that changes are immediately available on the' + u'search, but slows significantly the process. Default' + u'is false.') +@click.pass_context +def rebuild(ctx, verbose, force, refresh, only_missing, quiet, commit_each): u''' Rebuild search index ''' from ckan.lib.search import rebuild, commit - # if force: - # rebuild() + try: + with ctx.obj.app.apps.get('flask_app').app.app.app.test_request_context(): + rebuild(only_missing=only_missing, + force=force, + refresh=refresh, + defer_commit=(not commit_each), + quiet=quiet) + except Exception as e: + click.echo(e, err=True) + if not commit_each: + commit() diff --git a/ckan/cli/server.py b/ckan/cli/server.py index 387bc301704..99ad914ce92 100644 --- a/ckan/cli/server.py +++ b/ckan/cli/server.py @@ -18,4 +18,5 @@ @click.pass_context def run(ctx, config, host, port, reloader): u'''Runs development server''' + # click.secho(u"Starting CKAN", fg='yellow') run_simple(host, port, ctx.obj.app, use_reloader=reloader, use_evalex=True) From 3a0f6a6a0c0f507ce9b82d237791bfe46c1cdc71 Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Wed, 19 Sep 2018 13:27:04 +0200 Subject: [PATCH 29/99] Add more search-index commands --- ckan/cli/search_index.py | 40 ++++++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/ckan/cli/search_index.py b/ckan/cli/search_index.py index 99f55255009..cfa26ca9b6a 100644 --- a/ckan/cli/search_index.py +++ b/ckan/cli/search_index.py @@ -35,13 +35,41 @@ def rebuild(ctx, verbose, force, refresh, only_missing, quiet, commit_each): u''' Rebuild search index ''' from ckan.lib.search import rebuild, commit try: - with ctx.obj.app.apps.get('flask_app').app.app.app.test_request_context(): - rebuild(only_missing=only_missing, - force=force, - refresh=refresh, - defer_commit=(not commit_each), - quiet=quiet) + rebuild(only_missing=only_missing, + force=force, + refresh=refresh, + defer_commit=(not commit_each), + quiet=quiet) except Exception as e: click.echo(e, err=True) if not commit_each: commit() + + +@search_index.command(name=u'check', short_help=u'Check search index') +@click.help_option(u'-h', u'--help') +def check(): + from ckan.lib.search import check + check() + + +@search_index.command(name=u'show', short_help=u'Show index of a dataset') +@click.help_option(u'-h', u'--help') +@click.argument(u'dataset_name') +def show(dataset_name): + from ckan.lib.search import show + + index = show(dataset_name) + click.echo(index) + + +@search_index.command(name=u'clear', short_help=u'Clear the search index') +@click.help_option(u'-h', u'--help') +@click.argument(u'dataset_name', required=False) +def clear(dataset_name): + from ckan.lib.search import clear, clear_all + + if dataset_name: + clear(dataset_name) + else: + clear_all() From 92d3b50299a858da2a94c3a95cf4346d78ab7002 Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Wed, 19 Sep 2018 13:27:25 +0200 Subject: [PATCH 30/99] Workaround for translator object --- ckan/common.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ckan/common.py b/ckan/common.py index 6dbdd01fb63..5bf5163e44a 100644 --- a/ckan/common.py +++ b/ckan/common.py @@ -1,3 +1,4 @@ + # encoding: utf-8 # This file contains commonly used parts of external libraries. The idea is @@ -67,10 +68,9 @@ def streaming_response( def ugettext(*args, **kwargs): - if is_flask_request(): - return flask_ugettext(*args, **kwargs) - else: - return pylons_ugettext(*args, **kwargs) + # this is on purpose, as we have a problem to check + # for pylons or flask request + return flask_ugettext(*args, **kwargs) _ = ugettext From 7bd11d3f757d40023897bcdd5d5176106285dd47 Mon Sep 17 00:00:00 2001 From: David Read Date: Fri, 21 Sep 2018 17:22:12 +0100 Subject: [PATCH 31/99] Total estimation first attempt. Failing test for filters - I cant get it to accurately estimate with a filter. --- CHANGELOG.rst | 13 +++ ckanext/datastore/backend/postgres.py | 116 +++++++++++++++++++------ ckanext/datastore/logic/action.py | 9 ++ ckanext/datastore/logic/schema.py | 1 + ckanext/datastore/set_permissions.sql | 15 ++++ ckanext/datastore/tests/helpers.py | 1 + ckanext/datastore/tests/test_search.py | 82 +++++++++++++++++ 7 files changed, 212 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 34b50544daa..cb055f34edf 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -10,6 +10,19 @@ Changelog v.2.9.0 TBA ================== +General notes: + + * This version requires re-running the ``datastore set-permissions`` command + (assuming you are using the DataStore). See: :ref:`datastore-set-permissions` + + Otherwise datasets will not be viewable or searchable in DataStore and + the logs will contain this error:: + + ValidationError: ... function count_estimate(unknown) does not exist ... + + CKAN developers should also re-run set-permissions on the test database: + :ref:`datastore-test-set-permissions` + Minor changes: * For navl schemas, the 'default' validator no longer applies the default when diff --git a/ckanext/datastore/backend/postgres.py b/ckanext/datastore/backend/postgres.py index 29dc7a26ca1..3559dc9521b 100644 --- a/ckanext/datastore/backend/postgres.py +++ b/ckanext/datastore/backend/postgres.py @@ -14,6 +14,7 @@ import hashlib import json from cStringIO import StringIO +import sqlparse from six import string_types, text_type @@ -1161,6 +1162,7 @@ def validate(context, data_dict): data_dict_copy.pop('id', None) data_dict_copy.pop('include_total', None) + data_dict_copy.pop('total_estimation_threshold', None) data_dict_copy.pop('records_format', None) for key, values in data_dict_copy.iteritems(): @@ -1216,14 +1218,17 @@ def search_data(context, data_dict): else: sort_clause = '' + core_query = u''' + SELECT {distinct} {select} + FROM "{resource}" {ts_query} + {where} {sort} LIMIT {limit} OFFSET {offset}''' + records_format = data_dict['records_format'] if records_format == u'objects': sql_fmt = u''' SELECT array_to_json(array_agg(j))::text FROM ( - SELECT {distinct} {select} - FROM "{resource}" {ts_query} - {where} {sort} LIMIT {limit} OFFSET {offset} - ) AS j''' + CORE_QUERY + ) AS j'''.replace('CORE_QUERY', core_query) elif records_format == u'lists': select_columns = u" || ',' || ".join( s for s in query_dict['select'] @@ -1238,19 +1243,16 @@ def search_data(context, data_dict): elif records_format == u'csv': sql_fmt = u''' COPY ( - SELECT {distinct} {select} - FROM "{resource}" {ts_query} - {where} {sort} LIMIT {limit} OFFSET {offset} - ) TO STDOUT csv DELIMITER ',' ''' + CORE_QUERY + ) TO STDOUT csv DELIMITER ',' '''.replace('CORE_QUERY', core_query) elif records_format == u'tsv': sql_fmt = u''' COPY ( - SELECT {distinct} {select} - FROM "{resource}" {ts_query} - {where} {sort} LIMIT {limit} OFFSET {offset} - ) TO STDOUT csv DELIMITER '\t' ''' + CORE_QUERY + ) TO STDOUT csv DELIMITER '\t' ''' \ + .replace('CORE_QUERY', core_query) - sql_string = sql_fmt.format( + sql_params = dict( distinct=distinct, select=select_columns, resource=resource_id, @@ -1258,7 +1260,9 @@ def search_data(context, data_dict): where=where_clause, sort=sort_clause, limit=limit, - offset=offset) + offset=offset + ) + sql_string = sql_fmt.format(**sql_params) if records_format == u'csv' or records_format == u'tsv': buf = StringIO() _execute_single_statement_copy_to( @@ -1287,17 +1291,72 @@ def search_data(context, data_dict): _insert_links(data_dict, limit, offset) if data_dict.get('include_total', True): - count_sql_string = u'''SELECT count(*) FROM ( - SELECT {distinct} {select} - FROM "{resource}" {ts_query} {where}) as t;'''.format( - distinct=distinct, - select=select_columns, - resource=resource_id, - ts_query=ts_query, - where=where_clause) - count_result = _execute_single_statement( - context, count_sql_string, where_values) - data_dict['total'] = count_result.fetchall()[0][0] + total_estimation_threshold = \ + data_dict.get('total_estimation_threshold') or 0 + if total_estimation_threshold != 0: + # Estimate the total (result row count) + # See: https://wiki.postgresql.org/wiki/Count_estimate + + # Estimates rely on postgres having run ANALYZE on the table, so + # ensure that has been done. + when_was_last_analyze_sql = sqlalchemy.text(''' + SELECT last_analyze, last_autoanalyze + FROM pg_stat_user_tables + WHERE relname=:resource; + ''') + result = context['connection'].execute( + when_was_last_analyze_sql, **sql_params) + last_analyze, last_autoanalyze = result.fetchall()[0] + if not (last_analyze or last_autoanalyze): + analyze_sql = ''' + ANALYZE "{resource}"; + '''.format(**sql_params) + context['connection'].execute(analyze_sql) + + if not (where_clause or distinct): + # there are no filters, so we can use the table row count from + # pg stats + analyze_count_sql = sqlalchemy.text(''' + SELECT reltuples::BIGINT AS approximate_row_count + FROM pg_class + WHERE relname=:resource; + ''') + count_result = context['connection'].execute(analyze_count_sql, + **sql_params) + else: + # use EXPLAIN to get an estimate of the row count for this + # filtered query + explain_count_sql = ''' + SELECT count_estimate('{query}'); + '''.format(query=core_query.format(**sql_params)) + if len(sqlparse.split(explain_count_sql)) > 2: + raise toolkit.ValidationError({ + 'query': ['Query is not a single statement.'] + }) + # count_estimate() is defined by 'datastore set-permissions' + # - see datastore.rst + count_result = context['connection'].execute(explain_count_sql, + [where_values]) + estimated_total = count_result.fetchall()[0][0] + + if total_estimation_threshold != 0 and \ + estimated_total >= total_estimation_threshold: + data_dict['total'] = estimated_total + data_dict['total_was_estimated'] = True + else: + # this is slow for large results, hence the 'estimate' alternative + count_sql_string = u'''SELECT count(*) FROM ( + SELECT {distinct} {select} + FROM "{resource}" {ts_query} {where}) as t;'''.format( + distinct=distinct, + select=select_columns, + resource=resource_id, + ts_query=ts_query, + where=where_clause) + count_result = _execute_single_statement( + context, count_sql_string, where_values) + data_dict['total'] = count_result.fetchall()[0][0] + data_dict['total_was_estimated'] = False return data_dict @@ -1313,6 +1372,13 @@ def _execute_single_statement_copy_to(context, sql_string, where_values, buf): cursor.close() +def _execute(context, sql_string): + cursor = context['connection'].connection.cursor() + cursor.copy_expert(cursor.mogrify(sql_string, where_values), buf) + cursor.close() + + + def format_results(context, results, data_dict): result_fields = [] for field in results.cursor.description: diff --git a/ckanext/datastore/logic/action.py b/ckanext/datastore/logic/action.py index 0c91b0f1057..341279ddf04 100644 --- a/ckanext/datastore/logic/action.py +++ b/ckanext/datastore/logic/action.py @@ -405,6 +405,13 @@ def datastore_search(context, data_dict): :param include_total: True to return total matching record count (optional, default: true) :type include_total: bool + :param total_estimation_threshold: By default, the "total" returned is a + precise count of the query result, which can be computationally + expensive for large results (e.g. >100,000k rows). By setting this + value to non-zero, you can set a threshold estimated total, above + which it doesn't do the count - it just returns the estimated + total, generated by quick sampling (optional, default: 0) + :type total_estimation_threshold: int :param records_format: the format for the records return value: 'objects' (default) list of {fieldname1: value1, ...} dicts, 'lists' list of [value1, value2, ...] lists, @@ -438,6 +445,8 @@ def datastore_search(context, data_dict): :type filters: list of dictionaries :param total: number of total matching records :type total: int + :param total_was_estimated: whether or not the total was estimated + :type total: bool :param records: list of matching results :type records: depends on records_format value passed diff --git a/ckanext/datastore/logic/schema.py b/ckanext/datastore/logic/schema.py index c9b221d9f42..510c112f8aa 100644 --- a/ckanext/datastore/logic/schema.py +++ b/ckanext/datastore/logic/schema.py @@ -167,6 +167,7 @@ def datastore_search_schema(): 'sort': [ignore_missing, list_of_strings_or_string], 'distinct': [ignore_missing, boolean_validator], 'include_total': [default(True), boolean_validator], + 'total_estimation_threshold': [default(0), int_validator], 'records_format': [ default(u'objects'), OneOf([u'objects', u'lists', u'csv', u'tsv'])], diff --git a/ckanext/datastore/set_permissions.sql b/ckanext/datastore/set_permissions.sql index e1a8747c95a..8169f6ca1d7 100644 --- a/ckanext/datastore/set_permissions.sql +++ b/ckanext/datastore/set_permissions.sql @@ -106,3 +106,18 @@ DO $body$ END; $body$; +-- estimates the number of rows returned by a query +CREATE OR REPLACE FUNCTION count_estimate(query text) RETURNS INTEGER +AS $body$ + DECLARE + rec record; + ROWS INTEGER; + BEGIN + FOR rec IN EXECUTE 'EXPLAIN ' || query LOOP + ROWS := SUBSTRING(rec."QUERY PLAN" FROM ' rows=([[:digit:]]+)'); + EXIT WHEN ROWS IS NOT NULL; + END LOOP; + + RETURN ROWS; + END +$body$ LANGUAGE plpgsql; diff --git a/ckanext/datastore/tests/helpers.py b/ckanext/datastore/tests/helpers.py index c94aacd3a65..4aeed00c3ba 100644 --- a/ckanext/datastore/tests/helpers.py +++ b/ckanext/datastore/tests/helpers.py @@ -28,6 +28,7 @@ def clear_db(Session): FROM pg_proc INNER JOIN pg_namespace ns ON (pg_proc.pronamespace = ns.oid) WHERE ns.nspname = 'public' AND proname != 'populate_full_text_trigger' + AND proname != 'count_estimate' ''' drop_functions = u''.join(r[0] for r in c.execute(drop_functions_sql)) if drop_functions: diff --git a/ckanext/datastore/tests/test_search.py b/ckanext/datastore/tests/test_search.py index 994e8309c95..5dc959d21c2 100644 --- a/ckanext/datastore/tests/test_search.py +++ b/ckanext/datastore/tests/test_search.py @@ -100,6 +100,25 @@ def test_all_params_work_with_fields_with_whitespaces(self): result_years = [r['the year'] for r in result['records']] assert_equals(result_years, [2013]) + def test_search_total(self): + resource = factories.Resource() + data = { + 'resource_id': resource['id'], + 'force': True, + 'records': [ + {'the year': 2014}, + {'the year': 2013}, + ], + } + result = helpers.call_action('datastore_create', **data) + search_data = { + 'resource_id': resource['id'], + 'include_total': True, + } + result = helpers.call_action('datastore_search', **search_data) + assert_equals(result['total'], 2) + assert_equals(result.get('total_was_estimated'), False) + def test_search_without_total(self): resource = factories.Resource() data = { @@ -117,6 +136,69 @@ def test_search_without_total(self): } result = helpers.call_action('datastore_search', **search_data) assert 'total' not in result + assert 'total_was_estimated' not in result + + def test_estimate_total(self): + resource = factories.Resource() + data = { + 'resource_id': resource['id'], + 'force': True, + 'records': [{'the year': 1900 + i} for i in range(100)], + } + result = helpers.call_action('datastore_create', **data) + analyze_sql = ''' + ANALYZE "{resource}"; + '''.format(resource=resource['id']) + db.get_write_engine().execute(analyze_sql) + search_data = { + 'resource_id': resource['id'], + 'total_estimation_threshold': 50, + } + result = helpers.call_action('datastore_search', **search_data) + assert_equals(result.get('total_was_estimated'), True) + assert 95 < result['total'] < 105, result['total'] + + def test_estimate_total_with_filters(self): + resource = factories.Resource() + data = { + 'resource_id': resource['id'], + 'force': True, + 'records': [{'the year': 1900 + i} for i in range(3)] * 10, + } + result = helpers.call_action('datastore_create', **data) + analyze_sql = ''' + ANALYZE "{resource}"; + '''.format(resource=resource['id']) + db.get_write_engine().execute(analyze_sql) + search_data = { + 'resource_id': resource['id'], + 'filters': {u'the year': 1901}, + 'total_estimation_threshold': 5, + } + # if this fails with: "ValidationError: ... function + # count_estimate(unknown) does not exist ..." + # then you need to rerun set-permissions for the test db- see test.rst + result = helpers.call_action('datastore_search', **search_data) + assert 8 <= result['total'] <= 12, result['total'] + assert_equals(result.get('total_was_estimated'), True) + + def test_estimate_total_where_analyze_is_not_already_done(self): + # ANALYSE is done by latest datapusher/xloader, but need to cope in + # if tables created in other ways which may not have had an ANALYSE + resource = factories.Resource() + data = { + 'resource_id': resource['id'], + 'force': True, + 'records': [{'the year': 1900 + i} for i in range(100)], + } + result = helpers.call_action('datastore_create', **data) + search_data = { + 'resource_id': resource['id'], + 'total_estimation_threshold': 50, + } + result = helpers.call_action('datastore_search', **search_data) + assert_equals(result.get('total_was_estimated'), True) + assert 95 < result['total'] < 105, result['total'] class TestDatastoreSearch(DatastoreLegacyTestBase): From 4cad664982f7352c077fd31165fa185938a9dad1 Mon Sep 17 00:00:00 2001 From: David Read Date: Fri, 21 Sep 2018 17:43:28 +0100 Subject: [PATCH 32/99] Remove estimation for filters and distinct because they didnt work. --- CHANGELOG.rst | 13 ---- ckanext/datastore/backend/postgres.py | 86 ++++++++++---------------- ckanext/datastore/logic/action.py | 3 +- ckanext/datastore/set_permissions.sql | 15 ----- ckanext/datastore/tests/helpers.py | 1 - ckanext/datastore/tests/test_search.py | 31 ++++++++-- 6 files changed, 60 insertions(+), 89 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index cb055f34edf..34b50544daa 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -10,19 +10,6 @@ Changelog v.2.9.0 TBA ================== -General notes: - - * This version requires re-running the ``datastore set-permissions`` command - (assuming you are using the DataStore). See: :ref:`datastore-set-permissions` - - Otherwise datasets will not be viewable or searchable in DataStore and - the logs will contain this error:: - - ValidationError: ... function count_estimate(unknown) does not exist ... - - CKAN developers should also re-run set-permissions on the test database: - :ref:`datastore-test-set-permissions` - Minor changes: * For navl schemas, the 'default' validator no longer applies the default when diff --git a/ckanext/datastore/backend/postgres.py b/ckanext/datastore/backend/postgres.py index 3559dc9521b..39582f8a68e 100644 --- a/ckanext/datastore/backend/postgres.py +++ b/ckanext/datastore/backend/postgres.py @@ -14,7 +14,6 @@ import hashlib import json from cStringIO import StringIO -import sqlparse from six import string_types, text_type @@ -1218,17 +1217,14 @@ def search_data(context, data_dict): else: sort_clause = '' - core_query = u''' - SELECT {distinct} {select} - FROM "{resource}" {ts_query} - {where} {sort} LIMIT {limit} OFFSET {offset}''' - records_format = data_dict['records_format'] if records_format == u'objects': sql_fmt = u''' SELECT array_to_json(array_agg(j))::text FROM ( - CORE_QUERY - ) AS j'''.replace('CORE_QUERY', core_query) + SELECT {distinct} {select} + FROM "{resource}" {ts_query} + {where} {sort} LIMIT {limit} OFFSET {offset} + ) AS j''' elif records_format == u'lists': select_columns = u" || ',' || ".join( s for s in query_dict['select'] @@ -1243,16 +1239,19 @@ def search_data(context, data_dict): elif records_format == u'csv': sql_fmt = u''' COPY ( - CORE_QUERY - ) TO STDOUT csv DELIMITER ',' '''.replace('CORE_QUERY', core_query) + SELECT {distinct} {select} + FROM "{resource}" {ts_query} + {where} {sort} LIMIT {limit} OFFSET {offset} + ) TO STDOUT csv DELIMITER ',' ''' elif records_format == u'tsv': sql_fmt = u''' COPY ( - CORE_QUERY - ) TO STDOUT csv DELIMITER '\t' ''' \ - .replace('CORE_QUERY', core_query) + SELECT {distinct} {select} + FROM "{resource}" {ts_query} + {where} {sort} LIMIT {limit} OFFSET {offset} + ) TO STDOUT csv DELIMITER '\t' ''' - sql_params = dict( + sql_string = sql_fmt.format( distinct=distinct, select=select_columns, resource=resource_id, @@ -1260,9 +1259,7 @@ def search_data(context, data_dict): where=where_clause, sort=sort_clause, limit=limit, - offset=offset - ) - sql_string = sql_fmt.format(**sql_params) + offset=offset) if records_format == u'csv' or records_format == u'tsv': buf = StringIO() _execute_single_statement_copy_to( @@ -1293,7 +1290,8 @@ def search_data(context, data_dict): if data_dict.get('include_total', True): total_estimation_threshold = \ data_dict.get('total_estimation_threshold') or 0 - if total_estimation_threshold != 0: + estimated_total = None + if total_estimation_threshold != 0 and not (where_clause or distinct): # Estimate the total (result row count) # See: https://wiki.postgresql.org/wiki/Count_estimate @@ -1305,46 +1303,33 @@ def search_data(context, data_dict): WHERE relname=:resource; ''') result = context['connection'].execute( - when_was_last_analyze_sql, **sql_params) + when_was_last_analyze_sql, resource=resource_id) last_analyze, last_autoanalyze = result.fetchall()[0] if not (last_analyze or last_autoanalyze): analyze_sql = ''' ANALYZE "{resource}"; - '''.format(**sql_params) + '''.format(resource=resource_id) context['connection'].execute(analyze_sql) - if not (where_clause or distinct): - # there are no filters, so we can use the table row count from - # pg stats - analyze_count_sql = sqlalchemy.text(''' - SELECT reltuples::BIGINT AS approximate_row_count - FROM pg_class - WHERE relname=:resource; - ''') - count_result = context['connection'].execute(analyze_count_sql, - **sql_params) - else: - # use EXPLAIN to get an estimate of the row count for this - # filtered query - explain_count_sql = ''' - SELECT count_estimate('{query}'); - '''.format(query=core_query.format(**sql_params)) - if len(sqlparse.split(explain_count_sql)) > 2: - raise toolkit.ValidationError({ - 'query': ['Query is not a single statement.'] - }) - # count_estimate() is defined by 'datastore set-permissions' - # - see datastore.rst - count_result = context['connection'].execute(explain_count_sql, - [where_values]) + # there are no filters, so we can use the table row count from + # pg stats + # (We also tried using the EXPLAIN to estimate filtered queries but + # it didn't estimate well in tests) + analyze_count_sql = sqlalchemy.text(''' + SELECT reltuples::BIGINT AS approximate_row_count + FROM pg_class + WHERE relname=:resource; + ''') + count_result = context['connection'].execute(analyze_count_sql, + resource=resource_id) estimated_total = count_result.fetchall()[0][0] - if total_estimation_threshold != 0 and \ - estimated_total >= total_estimation_threshold: + if estimated_total is not None \ + and estimated_total >= total_estimation_threshold: data_dict['total'] = estimated_total data_dict['total_was_estimated'] = True else: - # this is slow for large results, hence the 'estimate' alternative + # this is slow for large results count_sql_string = u'''SELECT count(*) FROM ( SELECT {distinct} {select} FROM "{resource}" {ts_query} {where}) as t;'''.format( @@ -1372,13 +1357,6 @@ def _execute_single_statement_copy_to(context, sql_string, where_values, buf): cursor.close() -def _execute(context, sql_string): - cursor = context['connection'].connection.cursor() - cursor.copy_expert(cursor.mogrify(sql_string, where_values), buf) - cursor.close() - - - def format_results(context, results, data_dict): result_fields = [] for field in results.cursor.description: diff --git a/ckanext/datastore/logic/action.py b/ckanext/datastore/logic/action.py index 341279ddf04..cb2978a2552 100644 --- a/ckanext/datastore/logic/action.py +++ b/ckanext/datastore/logic/action.py @@ -410,7 +410,8 @@ def datastore_search(context, data_dict): expensive for large results (e.g. >100,000k rows). By setting this value to non-zero, you can set a threshold estimated total, above which it doesn't do the count - it just returns the estimated - total, generated by quick sampling (optional, default: 0) + total, generated by quick sampling. NB Total estimation is not + compatible with filter or distinct options. (optional, default: 0) :type total_estimation_threshold: int :param records_format: the format for the records return value: 'objects' (default) list of {fieldname1: value1, ...} dicts, diff --git a/ckanext/datastore/set_permissions.sql b/ckanext/datastore/set_permissions.sql index 8169f6ca1d7..e1a8747c95a 100644 --- a/ckanext/datastore/set_permissions.sql +++ b/ckanext/datastore/set_permissions.sql @@ -106,18 +106,3 @@ DO $body$ END; $body$; --- estimates the number of rows returned by a query -CREATE OR REPLACE FUNCTION count_estimate(query text) RETURNS INTEGER -AS $body$ - DECLARE - rec record; - ROWS INTEGER; - BEGIN - FOR rec IN EXECUTE 'EXPLAIN ' || query LOOP - ROWS := SUBSTRING(rec."QUERY PLAN" FROM ' rows=([[:digit:]]+)'); - EXIT WHEN ROWS IS NOT NULL; - END LOOP; - - RETURN ROWS; - END -$body$ LANGUAGE plpgsql; diff --git a/ckanext/datastore/tests/helpers.py b/ckanext/datastore/tests/helpers.py index 4aeed00c3ba..c94aacd3a65 100644 --- a/ckanext/datastore/tests/helpers.py +++ b/ckanext/datastore/tests/helpers.py @@ -28,7 +28,6 @@ def clear_db(Session): FROM pg_proc INNER JOIN pg_namespace ns ON (pg_proc.pronamespace = ns.oid) WHERE ns.nspname = 'public' AND proname != 'populate_full_text_trigger' - AND proname != 'count_estimate' ''' drop_functions = u''.join(r[0] for r in c.execute(drop_functions_sql)) if drop_functions: diff --git a/ckanext/datastore/tests/test_search.py b/ckanext/datastore/tests/test_search.py index 5dc959d21c2..2a30fccc271 100644 --- a/ckanext/datastore/tests/test_search.py +++ b/ckanext/datastore/tests/test_search.py @@ -175,12 +175,33 @@ def test_estimate_total_with_filters(self): 'filters': {u'the year': 1901}, 'total_estimation_threshold': 5, } - # if this fails with: "ValidationError: ... function - # count_estimate(unknown) does not exist ..." - # then you need to rerun set-permissions for the test db- see test.rst result = helpers.call_action('datastore_search', **search_data) - assert 8 <= result['total'] <= 12, result['total'] - assert_equals(result.get('total_was_estimated'), True) + assert_equals(result['total'], 10) + # estimation is not compatible with filters + assert_equals(result.get('total_was_estimated'), False) + + def test_estimate_total_with_distinct(self): + resource = factories.Resource() + data = { + 'resource_id': resource['id'], + 'force': True, + 'records': [{'the year': 1900 + i} for i in range(3)] * 10, + } + result = helpers.call_action('datastore_create', **data) + analyze_sql = ''' + ANALYZE "{resource}"; + '''.format(resource=resource['id']) + db.get_write_engine().execute(analyze_sql) + search_data = { + 'resource_id': resource['id'], + 'fields': ['the year'], + 'distinct': True, + 'total_estimation_threshold': 1, + } + result = helpers.call_action('datastore_search', **search_data) + assert_equals(result['total'], 3) + # estimation is not compatible with distinct + assert_equals(result.get('total_was_estimated'), False) def test_estimate_total_where_analyze_is_not_already_done(self): # ANALYSE is done by latest datapusher/xloader, but need to cope in From ee0348798dfe4eb788dcb0153120c61dc552f9f9 Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Tue, 25 Sep 2018 11:29:27 +0200 Subject: [PATCH 33/99] Adding rebuild fast, revert the workaround for the translator object --- ckan/cli/search_index.py | 53 ++++++++++++++++++++++++++++++++++++---- ckan/common.py | 7 +++--- 2 files changed, 52 insertions(+), 8 deletions(-) diff --git a/ckan/cli/search_index.py b/ckan/cli/search_index.py index cfa26ca9b6a..12f1ea64307 100644 --- a/ckan/cli/search_index.py +++ b/ckan/cli/search_index.py @@ -3,6 +3,7 @@ import os import click +import sqlalchemy as sa from flask import Flask, current_app from werkzeug.serving import run_simple @@ -33,13 +34,15 @@ def search_index(): @click.pass_context def rebuild(ctx, verbose, force, refresh, only_missing, quiet, commit_each): u''' Rebuild search index ''' + flask_app = ctx.obj.app.apps['flask_app']._wsgi_app from ckan.lib.search import rebuild, commit try: - rebuild(only_missing=only_missing, - force=force, - refresh=refresh, - defer_commit=(not commit_each), - quiet=quiet) + with flask_app.test_request_context(): + rebuild(only_missing=only_missing, + force=force, + refresh=refresh, + defer_commit=(not commit_each), + quiet=quiet) except Exception as e: click.echo(e, err=True) if not commit_each: @@ -73,3 +76,43 @@ def clear(dataset_name): clear(dataset_name) else: clear_all() + + +@search_index.command(name=u'rebuild-fast', + short_help=u'Reindex with multiprocessing') +@click.help_option(u'-h', u'--help') +@click.pass_context +def rebuild_fast(ctx): + conf = ctx.obj.config + db_url = conf['sqlalchemy.url'] + engine = sa.create_engine(db_url) + package_ids = [] + result = engine.execute(u"select id from package where state = 'active';") + for row in result: + package_ids.append(row[0]) + + def start(ids): + ## load actual enviroment for each subprocess, so each have thier own + ## sa session + self._load_config() + from ckan.lib.search import rebuild, commit + rebuild(package_ids=ids) + commit() + + def chunks(l, n): + u""" Yield n successive chunks from l. + u""" + newn = int(len(l) / n) + for i in xrange(0, n-1): + yield l[i*newn:i*newn+newn] + yield l[n*newn-newn:] + + processes = [] + for chunk in chunks(package_ids, mp.cpu_count()): + process = mp.Process(target=start, args=(chunk,)) + processes.append(process) + process.daemon = True + process.start() + + for process in processes: + process.join() diff --git a/ckan/common.py b/ckan/common.py index 5bf5163e44a..4dd1a0854e1 100644 --- a/ckan/common.py +++ b/ckan/common.py @@ -68,9 +68,10 @@ def streaming_response( def ugettext(*args, **kwargs): - # this is on purpose, as we have a problem to check - # for pylons or flask request - return flask_ugettext(*args, **kwargs) + if is_flask_request(): + return flask_ugettext(*args, **kwargs) + else: + return pylons_ugettext(*args, **kwargs) _ = ugettext From eaf03e616571063a9ac398b6d81429adb2ae1b2f Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Tue, 25 Sep 2018 11:39:09 +0200 Subject: [PATCH 34/99] Set test request context for rebuild fast --- ckan/cli/search_index.py | 23 ++++++++++++++--------- ckan/lib/cli.py | 3 --- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/ckan/cli/search_index.py b/ckan/cli/search_index.py index 12f1ea64307..4f78728386b 100644 --- a/ckan/cli/search_index.py +++ b/ckan/cli/search_index.py @@ -1,5 +1,6 @@ # encoding: utf-8 +import multiprocessing as mp import os import click @@ -84,6 +85,7 @@ def clear(dataset_name): @click.pass_context def rebuild_fast(ctx): conf = ctx.obj.config + flask_app = ctx.obj.app.apps['flask_app']._wsgi_app db_url = conf['sqlalchemy.url'] engine = sa.create_engine(db_url) package_ids = [] @@ -94,7 +96,6 @@ def rebuild_fast(ctx): def start(ids): ## load actual enviroment for each subprocess, so each have thier own ## sa session - self._load_config() from ckan.lib.search import rebuild, commit rebuild(package_ids=ids) commit() @@ -108,11 +109,15 @@ def chunks(l, n): yield l[n*newn-newn:] processes = [] - for chunk in chunks(package_ids, mp.cpu_count()): - process = mp.Process(target=start, args=(chunk,)) - processes.append(process) - process.daemon = True - process.start() - - for process in processes: - process.join() + with flask_app.test_request_context(): + try: + for chunk in chunks(package_ids, mp.cpu_count()): + process = mp.Process(target=start, args=(chunk,)) + processes.append(process) + process.daemon = True + process.start() + + for process in processes: + process.join() + except Exception as e: + click.echo(e.message) diff --git a/ckan/lib/cli.py b/ckan/lib/cli.py index aecc40c2f2e..77ef892a565 100644 --- a/ckan/lib/cli.py +++ b/ckan/lib/cli.py @@ -556,9 +556,6 @@ def rebuild_fast(self): package_ids.append(row[0]) def start(ids): - ## load actual enviroment for each subprocess, so each have thier own - ## sa session - self._load_config() from ckan.lib.search import rebuild, commit rebuild(package_ids=ids) commit() From d8dcdc50b1efdcb3d793130a533e0d96505e3706 Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Tue, 25 Sep 2018 14:06:17 +0200 Subject: [PATCH 35/99] pep8 --- ckan/cli/db.py | 6 +++--- ckan/cli/search_index.py | 5 +---- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/ckan/cli/db.py b/ckan/cli/db.py index ccf2d55541c..9559e9ee766 100644 --- a/ckan/cli/db.py +++ b/ckan/cli/db.py @@ -40,7 +40,7 @@ def cleandb(): model.repo.clean_db() except Exception as e: click.echo(e, err=True) - click.secho(u'Cleaning DB: SUCCESS', color="green", bold=True) + click.secho(u'Cleaning DB: SUCCESS', color=u"green", bold=True) @db.command(u'upgrade', short_help=u'Upgrade the database') @@ -53,7 +53,7 @@ def updatedb(version=None): model.repo.upgrade_db(version) except Exception as e: click.echo(e, err=True) - click.secho(u'Upgrading DB: SUCCESS', fg='green', bold=True) + click.secho(u'Upgrading DB: SUCCESS', fg=u'green', bold=True) @db.command(u'version', short_help=u'Returns current version of data schema') @@ -65,6 +65,6 @@ def version(): ver = Session.execute(u'select version from ' u'migrate_version;').fetchall() click.secho(u"Latest data schema version: {0}".format(ver[0][0]), - fg="green", bold=True) + fg=u"green", bold=True) except Exception as e: click.echo(e, err=True) diff --git a/ckan/cli/search_index.py b/ckan/cli/search_index.py index 4f78728386b..5d605f0bd2e 100644 --- a/ckan/cli/search_index.py +++ b/ckan/cli/search_index.py @@ -94,15 +94,12 @@ def rebuild_fast(ctx): package_ids.append(row[0]) def start(ids): - ## load actual enviroment for each subprocess, so each have thier own - ## sa session from ckan.lib.search import rebuild, commit rebuild(package_ids=ids) commit() def chunks(l, n): - u""" Yield n successive chunks from l. - u""" + u""" Yield n successive chunks from l.""" newn = int(len(l) / n) for i in xrange(0, n-1): yield l[i*newn:i*newn+newn] From 66b54bfdca98f3ed18f723ef4b74f305eed7e0f4 Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Thu, 27 Sep 2018 14:56:18 +0200 Subject: [PATCH 36/99] Enabling logging with the CLI --- ckan/cli/cli.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ckan/cli/cli.py b/ckan/cli/cli.py index 774e6c2ac4a..3eabc8de3dc 100644 --- a/ckan/cli/cli.py +++ b/ckan/cli/cli.py @@ -7,6 +7,11 @@ from ckan.cli import db, load_config, click_config_option, search_index, server from ckan.config.middleware import make_app +import logging + +log = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') + class CkanCommand(object): @@ -20,6 +25,7 @@ def __init__(self, config=None): @click_config_option @click.pass_context def ckan(ctx, config, *args, **kwargs): + log.info(u'Loading configuration') ctx.obj = CkanCommand(config) From 680448bab268d63c348191d0e1ec64b33b706405 Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Fri, 28 Sep 2018 11:04:21 +0200 Subject: [PATCH 37/99] testin messages --- ckan/cli/__init__.py | 4 ++-- ckan/cli/cli.py | 9 ++++++++- ckan/cli/db.py | 6 ++++++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/ckan/cli/__init__.py b/ckan/cli/__init__.py index e450bf32e50..9218458d093 100644 --- a/ckan/cli/__init__.py +++ b/ckan/cli/__init__.py @@ -14,6 +14,7 @@ log = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') click_config_option = click.option( @@ -28,7 +29,7 @@ def load_config(config=None): from paste.deploy import appconfig from paste.script.util.logging_config import fileConfig - + log.info("Searching for configuration file") if config: filename = os.path.abspath(config) config_source = u'-c parameter' @@ -53,5 +54,4 @@ def load_config(config=None): exit(msg) fileConfig(filename) - log.debug(u"Using " + filename) return appconfig(u'config:' + filename) diff --git a/ckan/cli/cli.py b/ckan/cli/cli.py index 3eabc8de3dc..ef5dff8f550 100644 --- a/ckan/cli/cli.py +++ b/ckan/cli/cli.py @@ -10,7 +10,14 @@ import logging log = logging.getLogger(__name__) -logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') + + +# class CliLoggingHandler(logging.Handler): + +# def __init__(self): +# super(CliLoggingHandler, self).__init__() + +# def emit(self, record): class CkanCommand(object): diff --git a/ckan/cli/db.py b/ckan/cli/db.py index 9559e9ee766..70793555a70 100644 --- a/ckan/cli/db.py +++ b/ckan/cli/db.py @@ -7,6 +7,10 @@ from werkzeug.serving import run_simple from ckan.cli import click_config_option +import logging + + +log = logging.getLogger(__name__) @click.group(name=u'db', short_help=u'Database commands') @@ -19,6 +23,7 @@ def db(): @click.help_option(u'-h', u'--help') def initdb(): u'''Initialising the database''' + log.info("Initialize the Database") try: import ckan.model as model model.repo.init_db() @@ -60,6 +65,7 @@ def updatedb(version=None): @click.help_option(u'-h', u'--help') def version(): u'''Return current version''' + log.info("Returning current DB version") try: from ckan.model import Session ver = Session.execute(u'select version from ' From 034b90ab46bd5a13fd857dc885f1f95c446a8289 Mon Sep 17 00:00:00 2001 From: David Read Date: Fri, 28 Sep 2018 12:42:38 +0100 Subject: [PATCH 38/99] Package search rows limit is now configurable --- ckan/lib/search/query.py | 4 +++- ckan/logic/action/get.py | 4 ++-- ckan/tests/logic/action/test_get.py | 19 +++++++++++++++++++ doc/maintaining/configuration.rst | 12 ++++++++++++ 4 files changed, 36 insertions(+), 3 deletions(-) diff --git a/ckan/lib/search/query.py b/ckan/lib/search/query.py index 599125bbe54..29740728ad1 100644 --- a/ckan/lib/search/query.py +++ b/ckan/lib/search/query.py @@ -312,7 +312,9 @@ def run(self, query, permission_labels=None, **kwargs): query['q'] = "*:*" # number of results - rows_to_return = min(1000, int(query.get('rows', 10))) + rows_to_return = min( + int(config.get('ckan.search.rows_max', 1000)), + int(query.get('rows', 10))) if rows_to_return > 0: # #1683 Work around problem of last result being out of order # in SOLR 1.4 diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index 095b862638a..fadbd2311e0 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -1694,8 +1694,8 @@ def package_search(context, data_dict): documentation, this is a comma-separated string of field names and sort-orderings. :type sort: string - :param rows: the number of matching rows to return. There is a hard limit - of 1000 datasets per query. + :param rows: the number of matching rows (datasets) to return. Default is + 10. Upper limit is 1000, depending on site configuration. :type rows: int :param start: the offset in the complete result for where the set of returned datasets should begin. diff --git a/ckan/tests/logic/action/test_get.py b/ckan/tests/logic/action/test_get.py index 70a85384a8e..69838752ecf 100644 --- a/ckan/tests/logic/action/test_get.py +++ b/ckan/tests/logic/action/test_get.py @@ -892,6 +892,25 @@ def test_bad_solr_parameter(self): # SOLR error is 'Missing sort order' or 'Missing_sort_order', # depending on the solr version. + def _create_bulk_datasets(self, name, count): + from ckan import model + model.repo.new_revision() + pkgs = [model.Package(name='{}_{}'.format(name, i)) + for i in range(count)] + model.Session.add_all(pkgs) + model.repo.commit_and_remove() + + def test_rows_returned_default(self): + self._create_bulk_datasets('rows_default', 11) + results = logic.get_action('package_search')({}, {}) + eq(len(results['results']), 10) # i.e. 'rows' default value + + @helpers.change_config('ckan.search.rows_max', '12') + def test_rows_returned_limited(self): + self._create_bulk_datasets('rows_limited', 14) + results = logic.get_action('package_search')({}, {'rows': '15'}) + eq(len(results['results']), 12) # i.e. ckan.search.rows_max + def test_facets(self): org = factories.Organization(name='test-org-facet', title='Test Org') factories.Dataset(owner_org=org['id']) diff --git a/doc/maintaining/configuration.rst b/doc/maintaining/configuration.rst index e1348ebcead..0bfcb425bae 100644 --- a/doc/maintaining/configuration.rst +++ b/doc/maintaining/configuration.rst @@ -734,6 +734,18 @@ Default value: ``None`` List of the extra resource fields that would be used when searching. +.. _ckan.search.rows_max: + +ckan.search.rows_max +^^^^^^^^^^^^^^^^^^^^ + +Example:: + + ckan.search.rows_max = 1000 + +Default value: ``1000`` + +Maximum allowed value for ``package_search``'s ``rows`` parameter. Redis Settings --------------- From 2ce2a22f9887359ae512952be53bf3954e26d5fe Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Fri, 28 Sep 2018 14:15:12 +0200 Subject: [PATCH 39/99] Testing with wekzeug logger --- ckan/cli/__init__.py | 2 +- ckan/cli/db.py | 2 +- ckan/config/middleware/flask_app.py | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/ckan/cli/__init__.py b/ckan/cli/__init__.py index 9218458d093..1b7029d3bee 100644 --- a/ckan/cli/__init__.py +++ b/ckan/cli/__init__.py @@ -13,7 +13,7 @@ from ckan.config.middleware import make_app -log = logging.getLogger(__name__) +log = logging.getLogger('werkzeug') logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') diff --git a/ckan/cli/db.py b/ckan/cli/db.py index 70793555a70..e8ddbe00225 100644 --- a/ckan/cli/db.py +++ b/ckan/cli/db.py @@ -65,7 +65,7 @@ def updatedb(version=None): @click.help_option(u'-h', u'--help') def version(): u'''Return current version''' - log.info("Returning current DB version") + log.info("Returning current DB") try: from ckan.model import Session ver = Session.execute(u'select version from ' diff --git a/ckan/config/middleware/flask_app.py b/ckan/config/middleware/flask_app.py index 3c56278b78a..e10ba332364 100644 --- a/ckan/config/middleware/flask_app.py +++ b/ckan/config/middleware/flask_app.py @@ -111,6 +111,7 @@ def make_flask_stack(conf, **app_conf): app = DebuggedApplication(app, True) app = app.app + # Use Beaker as the Flask session interface class BeakerSessionInterface(SessionInterface): def open_session(self, app, request): @@ -269,6 +270,8 @@ def hello_world_post(): # Add a reference to the actual Flask app so it's easier to access app._wsgi_app = flask_app + app.logger.propagate = True + # app.logger.propagate = True return app From 1b85c23b5bc707a8ab50ec55bce23f856502467e Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Fri, 28 Sep 2018 14:56:50 +0200 Subject: [PATCH 40/99] Revert some changes --- ckan/cli/__init__.py | 5 +++-- ckan/config/middleware/flask_app.py | 3 --- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/ckan/cli/__init__.py b/ckan/cli/__init__.py index 1b7029d3bee..009b4ecd03c 100644 --- a/ckan/cli/__init__.py +++ b/ckan/cli/__init__.py @@ -13,8 +13,9 @@ from ckan.config.middleware import make_app -log = logging.getLogger('werkzeug') -logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') +log = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO, + format='%(asctime)s %(levelname)s [%(name)s] %(message)s') click_config_option = click.option( diff --git a/ckan/config/middleware/flask_app.py b/ckan/config/middleware/flask_app.py index e10ba332364..3c56278b78a 100644 --- a/ckan/config/middleware/flask_app.py +++ b/ckan/config/middleware/flask_app.py @@ -111,7 +111,6 @@ def make_flask_stack(conf, **app_conf): app = DebuggedApplication(app, True) app = app.app - # Use Beaker as the Flask session interface class BeakerSessionInterface(SessionInterface): def open_session(self, app, request): @@ -270,8 +269,6 @@ def hello_world_post(): # Add a reference to the actual Flask app so it's easier to access app._wsgi_app = flask_app - app.logger.propagate = True - # app.logger.propagate = True return app From de12494c0ae76789b2bc6c1a54ec426b1c5b728c Mon Sep 17 00:00:00 2001 From: David Read Date: Fri, 28 Sep 2018 14:20:09 +0100 Subject: [PATCH 41/99] Add limits to package/user/group/organization_activity_list (_html) --- ckan/logic/action/get.py | 91 +++++++++++----- ckan/tests/logic/action/test_get.py | 158 ++++++++++++++++++++++++++++ doc/maintaining/configuration.rst | 16 ++- test-core.ini | 2 - 4 files changed, 235 insertions(+), 32 deletions(-) diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index fadbd2311e0..e554617d476 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -1694,8 +1694,9 @@ def package_search(context, data_dict): documentation, this is a comma-separated string of field names and sort-orderings. :type sort: string - :param rows: the number of matching rows (datasets) to return. Default is - 10. Upper limit is 1000, depending on site configuration. + :param rows: the maximum number of matching rows (datasets) to return. + (optional, default: ``10``, upper limit: ``1000`` unless set in + site's configuration ``ckan.search.rows_max``) :type rows: int :param start: the offset in the complete result for where the set of returned datasets should begin. @@ -2461,8 +2462,9 @@ def user_activity_list(context, data_dict): (optional, default: ``0``) :type offset: int :param limit: the maximum number of activities to return - (optional, default: ``31``, the default value is configurable via the - ckan.activity_list_limit setting) + (optional, default: ``31`` unless set in site's configuration + ``ckan.activity_list_limit``, upper limit: ``100`` unless set in + site's configuration ``ckan.activity_list_limit_max``) :type limit: int :rtype: list of dictionaries @@ -2480,8 +2482,10 @@ def user_activity_list(context, data_dict): raise logic.NotFound offset = data_dict.get('offset', 0) - limit = int( - data_dict.get('limit', config.get('ckan.activity_list_limit', 31))) + limit = min( + int(data_dict.get('limit', config.get('ckan.activity_list_limit', 31))), + int(config.get('ckan.activity_list_limit_max', 100)) + ) _activity_objects = model.activity.user_activity_list(user.id, limit=limit, offset=offset) @@ -2503,8 +2507,9 @@ def package_activity_list(context, data_dict): (optional, default: ``0``) :type offset: int :param limit: the maximum number of activities to return - (optional, default: ``31``, the default value is configurable via the - ckan.activity_list_limit setting) + (optional, default: ``31`` unless set in site's configuration + ``ckan.activity_list_limit``, upper limit: ``100`` unless set in + site's configuration ``ckan.activity_list_limit_max``) :type limit: int :rtype: list of dictionaries @@ -2522,8 +2527,10 @@ def package_activity_list(context, data_dict): raise logic.NotFound offset = int(data_dict.get('offset', 0)) - limit = int( - data_dict.get('limit', config.get('ckan.activity_list_limit', 31))) + limit = min( + int(data_dict.get('limit', config.get('ckan.activity_list_limit', 31))), + int(config.get('ckan.activity_list_limit_max', 100)) + ) _activity_objects = model.activity.package_activity_list(package.id, limit=limit, offset=offset) @@ -2545,8 +2552,9 @@ def group_activity_list(context, data_dict): (optional, default: ``0``) :type offset: int :param limit: the maximum number of activities to return - (optional, default: ``31``, the default value is configurable via the - ckan.activity_list_limit setting) + (optional, default: ``31`` unless set in site's configuration + ``ckan.activity_list_limit``, upper limit: ``100`` unless set in + site's configuration ``ckan.activity_list_limit_max``) :type limit: int :rtype: list of dictionaries @@ -2559,8 +2567,10 @@ def group_activity_list(context, data_dict): model = context['model'] group_id = data_dict.get('id') offset = data_dict.get('offset', 0) - limit = int( - data_dict.get('limit', config.get('ckan.activity_list_limit', 31))) + limit = min( + int(data_dict.get('limit', config.get('ckan.activity_list_limit', 31))), + int(config.get('ckan.activity_list_limit_max', 100)) + ) # Convert group_id (could be id or name) into id. group_show = logic.get_action('group_show') @@ -2580,6 +2590,14 @@ def organization_activity_list(context, data_dict): :param id: the id or name of the organization :type id: string + :param offset: where to start getting activity items from + (optional, default: ``0``) + :type offset: int + :param limit: the maximum number of activities to return + (optional, default: ``31`` unless set in site's configuration + ``ckan.activity_list_limit``, upper limit: ``100`` unless set in + site's configuration ``ckan.activity_list_limit_max``) + :type limit: int :rtype: list of dictionaries @@ -2591,8 +2609,10 @@ def organization_activity_list(context, data_dict): model = context['model'] org_id = data_dict.get('id') offset = data_dict.get('offset', 0) - limit = int( - data_dict.get('limit', config.get('ckan.activity_list_limit', 31))) + limit = min( + int(data_dict.get('limit', config.get('ckan.activity_list_limit', 31))), + int(config.get('ckan.activity_list_limit_max', 100)) + ) # Convert org_id (could be id or name) into id. org_show = logic.get_action('organization_show') @@ -2614,8 +2634,9 @@ def recently_changed_packages_activity_list(context, data_dict): (optional, default: ``0``) :type offset: int :param limit: the maximum number of activities to return - (optional, default: ``31``, the default value is configurable via the - ckan.activity_list_limit setting) + (optional, default: ``31`` unless set in site's configuration + ``ckan.activity_list_limit``, upper limit: ``100`` unless set in + site's configuration ``ckan.activity_list_limit_max``) :type limit: int :rtype: list of dictionaries @@ -2625,8 +2646,10 @@ def recently_changed_packages_activity_list(context, data_dict): # authorized to read. model = context['model'] offset = data_dict.get('offset', 0) - limit = int( - data_dict.get('limit', config.get('ckan.activity_list_limit', 31))) + limit = min( + int(data_dict.get('limit', config.get('ckan.activity_list_limit', 31))), + int(config.get('ckan.activity_list_limit_max', 100)) + ) _activity_objects = model.activity.recently_changed_packages_activity_list( limit=limit, offset=offset) @@ -2665,8 +2688,9 @@ def user_activity_list_html(context, data_dict): (optional, default: ``0``) :type offset: int :param limit: the maximum number of activities to return - (optional, default: ``31``, the default value is configurable via the - ckan.activity_list_limit setting) + (optional, default: ``31`` unless set in site's configuration + ``ckan.activity_list_limit``, upper limit: ``100`` unless set in + site's configuration ``ckan.activity_list_limit_max``) :type limit: int :rtype: string @@ -2696,8 +2720,9 @@ def package_activity_list_html(context, data_dict): (optional, default: ``0``) :type offset: int :param limit: the maximum number of activities to return - (optional, default: ``31``, the default value is configurable via the - ckan.activity_list_limit setting) + (optional, default: ``31`` unless set in site's configuration + ``ckan.activity_list_limit``, upper limit: ``100`` unless set in + site's configuration ``ckan.activity_list_limit_max``) :type limit: int :rtype: string @@ -2727,8 +2752,9 @@ def group_activity_list_html(context, data_dict): (optional, default: ``0``) :type offset: int :param limit: the maximum number of activities to return - (optional, default: ``31``, the default value is configurable via the - ckan.activity_list_limit setting) + (optional, default: ``31`` unless set in site's configuration + ``ckan.activity_list_limit``, upper limit: ``100`` unless set in + site's configuration ``ckan.activity_list_limit_max``) :type limit: int :rtype: string @@ -2754,6 +2780,14 @@ def organization_activity_list_html(context, data_dict): :param id: the id or name of the organization :type id: string + :param offset: where to start getting activity items from + (optional, default: ``0``) + :type offset: int + :param limit: the maximum number of activities to return + (optional, default: ``31`` unless set in site's configuration + ``ckan.activity_list_limit``, upper limit: ``100`` unless set in + site's configuration ``ckan.activity_list_limit_max``) + :type limit: int :rtype: string @@ -2782,8 +2816,9 @@ def recently_changed_packages_activity_list_html(context, data_dict): (optional, default: ``0``) :type offset: int :param limit: the maximum number of activities to return - (optional, default: ``31``, the default value is configurable via the - ckan.activity_list_limit setting) + (optional, default: ``31`` unless set in site's configuration + ``ckan.activity_list_limit``, upper limit: ``100`` unless set in + site's configuration ``ckan.activity_list_limit_max``) :type limit: int :rtype: string diff --git a/ckan/tests/logic/action/test_get.py b/ckan/tests/logic/action/test_get.py index 69838752ecf..a6f6439af77 100644 --- a/ckan/tests/logic/action/test_get.py +++ b/ckan/tests/logic/action/test_get.py @@ -2247,3 +2247,161 @@ def test_not_existing_job(self): Test showing a not existing job. ''' helpers.call_action(u'job_show', id=u'does-not-exist') + + +class TestPackageActivityList(helpers.FunctionalTestBase): + def _create_bulk_package_activities(self, count): + dataset = factories.Dataset() + from ckan import model + objs = [model.Activity( + user_id=None, object_id=dataset['id'], revision_id=None, + activity_type=None, data=None) + for i in range(count)] + model.Session.add_all(objs) + model.repo.commit_and_remove() + return dataset['id'] + + def test_limit_default(self): + id = self._create_bulk_package_activities(35) + results = logic.get_action('package_activity_list')({}, {'id': id}) + eq(len(results), 31) # i.e. default value + + @helpers.change_config('ckan.activity_list_limit', '5') + def test_limit_configured(self): + id = self._create_bulk_package_activities(7) + results = logic.get_action('package_activity_list')({}, {'id': id}) + eq(len(results), 5) # i.e. ckan.activity_list_limit + + @helpers.change_config('ckan.activity_list_limit', '5') + @helpers.change_config('ckan.activity_list_limit_max', '7') + def test_limit_hits_max(self): + id = self._create_bulk_package_activities(9) + results = logic.get_action('package_activity_list')( + {}, {'id': id, 'limit': '9'}) + eq(len(results), 7) # i.e. ckan.activity_list_limit_max + + +class TestUserActivityList(helpers.FunctionalTestBase): + def _create_bulk_user_activities(self, count): + user = factories.User() + from ckan import model + objs = [model.Activity( + user_id=user['id'], object_id=None, revision_id=None, + activity_type=None, data=None) + for i in range(count)] + model.Session.add_all(objs) + model.repo.commit_and_remove() + return user['id'] + + def test_limit_default(self): + id = self._create_bulk_user_activities(35) + results = logic.get_action('user_activity_list')({}, {'id': id}) + eq(len(results), 31) # i.e. default value + + @helpers.change_config('ckan.activity_list_limit', '5') + def test_limit_configured(self): + id = self._create_bulk_user_activities(7) + results = logic.get_action('user_activity_list')({}, {'id': id}) + eq(len(results), 5) # i.e. ckan.activity_list_limit + + @helpers.change_config('ckan.activity_list_limit', '5') + @helpers.change_config('ckan.activity_list_limit_max', '7') + def test_limit_hits_max(self): + id = self._create_bulk_user_activities(9) + results = logic.get_action('user_activity_list')( + {}, {'id': id, 'limit': '9'}) + eq(len(results), 7) # i.e. ckan.activity_list_limit_max + + +class TestGroupActivityList(helpers.FunctionalTestBase): + def _create_bulk_group_activities(self, count): + group = factories.Group() + from ckan import model + objs = [model.Activity( + user_id=None, object_id=group['id'], revision_id=None, + activity_type=None, data=None) + for i in range(count)] + model.Session.add_all(objs) + model.repo.commit_and_remove() + return group['id'] + + def test_limit_default(self): + id = self._create_bulk_group_activities(35) + results = logic.get_action('group_activity_list')({}, {'id': id}) + eq(len(results), 31) # i.e. default value + + @helpers.change_config('ckan.activity_list_limit', '5') + def test_limit_configured(self): + id = self._create_bulk_group_activities(7) + results = logic.get_action('group_activity_list')({}, {'id': id}) + eq(len(results), 5) # i.e. ckan.activity_list_limit + + @helpers.change_config('ckan.activity_list_limit', '5') + @helpers.change_config('ckan.activity_list_limit_max', '7') + def test_limit_hits_max(self): + id = self._create_bulk_group_activities(9) + results = logic.get_action('group_activity_list')( + {}, {'id': id, 'limit': '9'}) + eq(len(results), 7) # i.e. ckan.activity_list_limit_max + + +class TestOrganizationActivityList(helpers.FunctionalTestBase): + def _create_bulk_org_activities(self, count): + org = factories.Organization() + from ckan import model + objs = [model.Activity( + user_id=None, object_id=org['id'], revision_id=None, + activity_type=None, data=None) + for i in range(count)] + model.Session.add_all(objs) + model.repo.commit_and_remove() + return org['id'] + + def test_limit_default(self): + id = self._create_bulk_org_activities(35) + results = logic.get_action('organization_activity_list')({}, {'id': id}) + eq(len(results), 31) # i.e. default value + + @helpers.change_config('ckan.activity_list_limit', '5') + def test_limit_configured(self): + id = self._create_bulk_org_activities(7) + results = logic.get_action('organization_activity_list')({}, {'id': id}) + eq(len(results), 5) # i.e. ckan.activity_list_limit + + @helpers.change_config('ckan.activity_list_limit', '5') + @helpers.change_config('ckan.activity_list_limit_max', '7') + def test_limit_hits_max(self): + id = self._create_bulk_org_activities(9) + results = logic.get_action('organization_activity_list')( + {}, {'id': id, 'limit': '9'}) + eq(len(results), 7) # i.e. ckan.activity_list_limit_max + + +class TestRecentlyChangedPackagesActivityList(helpers.FunctionalTestBase): + def _create_bulk_package_activities(self, count): + from ckan import model + objs = [model.Activity( + user_id=None, object_id=None, revision_id=None, + activity_type='new_package', data=None) + for i in range(count)] + model.Session.add_all(objs) + model.repo.commit_and_remove() + + def test_limit_default(self): + self._create_bulk_package_activities(35) + results = logic.get_action('recently_changed_packages_activity_list')({}, {}) + eq(len(results), 31) # i.e. default value + + @helpers.change_config('ckan.activity_list_limit', '5') + def test_limit_configured(self): + self._create_bulk_package_activities(7) + results = logic.get_action('recently_changed_packages_activity_list')({}, {}) + eq(len(results), 5) # i.e. ckan.activity_list_limit + + @helpers.change_config('ckan.activity_list_limit', '5') + @helpers.change_config('ckan.activity_list_limit_max', '7') + def test_limit_hits_max(self): + self._create_bulk_package_activities(9) + results = logic.get_action('recently_changed_packages_activity_list')( + {}, {'limit': '9'}) + eq(len(results), 7) # i.e. ckan.activity_list_limit_max \ No newline at end of file diff --git a/doc/maintaining/configuration.rst b/doc/maintaining/configuration.rst index 0bfcb425bae..fbdbf1b4510 100644 --- a/doc/maintaining/configuration.rst +++ b/doc/maintaining/configuration.rst @@ -1561,10 +1561,22 @@ Example:: ckan.activity_list_limit = 31 -Default value: ``infinite`` +Default value: ``31`` + +This controls the number of activities to show in the Activity Stream. + +.. _ckan.activity_list_limit_max: + +ckan.activity_list_limit_max +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Example:: + + ckan.activity_list_limit_max = 100 -This controls the number of activities to show in the Activity Stream. By default, it shows everything. +Default value: ``100`` +Maximum allowed value for Activity Stream ``limit`` parameter. .. _ckan.email_notifications_since: diff --git a/test-core.ini b/test-core.ini index 43feb67893c..879ee5547b3 100644 --- a/test-core.ini +++ b/test-core.ini @@ -92,8 +92,6 @@ ckan.datasets_per_page = 20 ckan.activity_streams_email_notifications = True -ckan.activity_list_limit = 15 - ckan.tracking_enabled = true beaker.session.key = ckan From 4e6f42803fac5d4b5caee6e4b93294ae14fd14a3 Mon Sep 17 00:00:00 2001 From: David Read Date: Fri, 28 Sep 2018 17:33:29 +0100 Subject: [PATCH 42/99] Using the schema to do the default & limit, instead of code. declarative. --- ckan/lib/navl/validators.py | 12 +++++++++++- ckan/lib/search/query.py | 2 +- ckan/logic/schema.py | 5 +++-- ckan/tests/logic/action/test_get.py | 19 +++++++++++++++++++ 4 files changed, 34 insertions(+), 4 deletions(-) diff --git a/ckan/lib/navl/validators.py b/ckan/lib/navl/validators.py index 172da15316d..6af970f1a11 100644 --- a/ckan/lib/navl/validators.py +++ b/ckan/lib/navl/validators.py @@ -4,7 +4,7 @@ import ckan.lib.navl.dictization_functions as df -from ckan.common import _, json +from ckan.common import _, json, config missing = df.missing StopOnError = df.StopOnError @@ -163,3 +163,13 @@ def unicode_safe(value): return text_type(value) except Exception: return u'\N{REPLACEMENT CHARACTER}' + +def limit_to_configured_maximum(config_option, default_limit): + def callable(key, data, errors, context): + + value = data.get(key) + limit = int(config.get(config_option, default_limit)) + if value > limit: + data[key] = limit + + return callable diff --git a/ckan/lib/search/query.py b/ckan/lib/search/query.py index 599125bbe54..6f61f3458d3 100644 --- a/ckan/lib/search/query.py +++ b/ckan/lib/search/query.py @@ -312,7 +312,7 @@ def run(self, query, permission_labels=None, **kwargs): query['q'] = "*:*" # number of results - rows_to_return = min(1000, int(query.get('rows', 10))) + rows_to_return = query['rows'] # defaulted & made an int by schema if rows_to_return > 0: # #1683 Work around problem of last result being out of order # in SOLR 1.4 diff --git a/ckan/logic/schema.py b/ckan/logic/schema.py index d01836131c2..d244d5ce8d8 100644 --- a/ckan/logic/schema.py +++ b/ckan/logic/schema.py @@ -627,12 +627,13 @@ def default_autocomplete_schema( def default_package_search_schema( ignore_missing, unicode_safe, list_of_strings, natural_number_validator, int_validator, convert_to_json_if_string, - convert_to_list_if_string): + convert_to_list_if_string, limit_to_configured_maximum, default): return { 'q': [ignore_missing, unicode_safe], 'fl': [ignore_missing, convert_to_list_if_string], 'fq': [ignore_missing, unicode_safe], - 'rows': [ignore_missing, natural_number_validator], + 'rows': [default(10), natural_number_validator, + limit_to_configured_maximum('ckan.search.rows_max', 1000)], 'sort': [ignore_missing, unicode_safe], 'start': [ignore_missing, natural_number_validator], 'qf': [ignore_missing, unicode_safe], diff --git a/ckan/tests/logic/action/test_get.py b/ckan/tests/logic/action/test_get.py index 70a85384a8e..69838752ecf 100644 --- a/ckan/tests/logic/action/test_get.py +++ b/ckan/tests/logic/action/test_get.py @@ -892,6 +892,25 @@ def test_bad_solr_parameter(self): # SOLR error is 'Missing sort order' or 'Missing_sort_order', # depending on the solr version. + def _create_bulk_datasets(self, name, count): + from ckan import model + model.repo.new_revision() + pkgs = [model.Package(name='{}_{}'.format(name, i)) + for i in range(count)] + model.Session.add_all(pkgs) + model.repo.commit_and_remove() + + def test_rows_returned_default(self): + self._create_bulk_datasets('rows_default', 11) + results = logic.get_action('package_search')({}, {}) + eq(len(results['results']), 10) # i.e. 'rows' default value + + @helpers.change_config('ckan.search.rows_max', '12') + def test_rows_returned_limited(self): + self._create_bulk_datasets('rows_limited', 14) + results = logic.get_action('package_search')({}, {'rows': '15'}) + eq(len(results['results']), 12) # i.e. ckan.search.rows_max + def test_facets(self): org = factories.Organization(name='test-org-facet', title='Test Org') factories.Dataset(owner_org=org['id']) From d28a38daf7cfc77b8963b2050eae1a7798a523ba Mon Sep 17 00:00:00 2001 From: David Read Date: Fri, 28 Sep 2018 18:14:53 +0100 Subject: [PATCH 43/99] Convert limits to validators. Using default_pagination_schema seemed to be a slip so I fixed. --- ckan/lib/navl/validators.py | 10 ++++++ ckan/logic/action/get.py | 34 ++++++--------------- ckan/logic/schema.py | 17 +++++++++-- ckan/tests/logic/action/test_get.py | 47 ++++++++++++++++------------- 4 files changed, 59 insertions(+), 49 deletions(-) diff --git a/ckan/lib/navl/validators.py b/ckan/lib/navl/validators.py index 6af970f1a11..6022722ce21 100644 --- a/ckan/lib/navl/validators.py +++ b/ckan/lib/navl/validators.py @@ -85,6 +85,16 @@ def callable(key, data, errors, context): return callable +def configured_default(config_name, default_value_if_not_configured): + '''When key is missing or value is an empty string or None, replace it with + a default value from config, or if that isn't set from the + default_value_if_not_configured.''' + + default_value = config.get(config_name) + if default_value is None: + default_value = default_value_if_not_configured + return default(default_value) + def ignore_missing(key, data, errors, context): '''If the key is missing from the data, ignore the rest of the key's schema. diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index e554617d476..06cd99f87dd 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -2482,10 +2482,7 @@ def user_activity_list(context, data_dict): raise logic.NotFound offset = data_dict.get('offset', 0) - limit = min( - int(data_dict.get('limit', config.get('ckan.activity_list_limit', 31))), - int(config.get('ckan.activity_list_limit_max', 100)) - ) + limit = data_dict['limit'] # defaulted, limited & made an int by schema _activity_objects = model.activity.user_activity_list(user.id, limit=limit, offset=offset) @@ -2527,10 +2524,7 @@ def package_activity_list(context, data_dict): raise logic.NotFound offset = int(data_dict.get('offset', 0)) - limit = min( - int(data_dict.get('limit', config.get('ckan.activity_list_limit', 31))), - int(config.get('ckan.activity_list_limit_max', 100)) - ) + limit = data_dict['limit'] # defaulted, limited & made an int by schema _activity_objects = model.activity.package_activity_list(package.id, limit=limit, offset=offset) @@ -2567,10 +2561,7 @@ def group_activity_list(context, data_dict): model = context['model'] group_id = data_dict.get('id') offset = data_dict.get('offset', 0) - limit = min( - int(data_dict.get('limit', config.get('ckan.activity_list_limit', 31))), - int(config.get('ckan.activity_list_limit_max', 100)) - ) + limit = data_dict['limit'] # defaulted, limited & made an int by schema # Convert group_id (could be id or name) into id. group_show = logic.get_action('group_show') @@ -2609,10 +2600,7 @@ def organization_activity_list(context, data_dict): model = context['model'] org_id = data_dict.get('id') offset = data_dict.get('offset', 0) - limit = min( - int(data_dict.get('limit', config.get('ckan.activity_list_limit', 31))), - int(config.get('ckan.activity_list_limit_max', 100)) - ) + limit = data_dict['limit'] # defaulted, limited & made an int by schema # Convert org_id (could be id or name) into id. org_show = logic.get_action('organization_show') @@ -2626,7 +2614,7 @@ def organization_activity_list(context, data_dict): return model_dictize.activity_list_dictize(activity_objects, context) -@logic.validate(logic.schema.default_pagination_schema) +@logic.validate(logic.schema.default_dashboard_activity_list_schema) def recently_changed_packages_activity_list(context, data_dict): '''Return the activity stream of all recently added or changed packages. @@ -2646,10 +2634,7 @@ def recently_changed_packages_activity_list(context, data_dict): # authorized to read. model = context['model'] offset = data_dict.get('offset', 0) - limit = min( - int(data_dict.get('limit', config.get('ckan.activity_list_limit', 31))), - int(config.get('ckan.activity_list_limit_max', 100)) - ) + limit = data_dict['limit'] # defaulted, limited & made an int by schema _activity_objects = model.activity.recently_changed_packages_activity_list( limit=limit, offset=offset) @@ -3309,7 +3294,7 @@ def _group_or_org_followee_list(context, data_dict, is_org=False): return [model_dictize.group_dictize(group, context) for group in groups] -@logic.validate(logic.schema.default_pagination_schema) +@logic.validate(logic.schema.default_dashboard_activity_list_schema) def dashboard_activity_list(context, data_dict): '''Return the authorized (via login or API key) user's dashboard activity stream. @@ -3337,8 +3322,7 @@ def dashboard_activity_list(context, data_dict): model = context['model'] user_id = model.User.get(context['user']).id offset = data_dict.get('offset', 0) - limit = int( - data_dict.get('limit', config.get('ckan.activity_list_limit', 31))) + limit = data_dict['limit'] # defaulted, limited & made an int by schema # FIXME: Filter out activities whose subject or object the user is not # authorized to read. @@ -3365,7 +3349,7 @@ def dashboard_activity_list(context, data_dict): return activity_dicts -@logic.validate(ckan.logic.schema.default_pagination_schema) +@logic.validate(ckan.logic.schema.default_dashboard_activity_list_schema) def dashboard_activity_list_html(context, data_dict): '''Return the authorized (via login or API key) user's dashboard activity stream as HTML. diff --git a/ckan/logic/schema.py b/ckan/logic/schema.py index d244d5ce8d8..7aa77fdd21d 100644 --- a/ckan/logic/schema.py +++ b/ckan/logic/schema.py @@ -601,16 +601,27 @@ def default_pagination_schema(ignore_missing, natural_number_validator): @validator_args -def default_dashboard_activity_list_schema(unicode_safe): +def default_dashboard_activity_list_schema( + configured_default, natural_number_validator, + limit_to_configured_maximum): schema = default_pagination_schema() - schema['id'] = [unicode_safe] + schema['limit'] = [ + configured_default('ckan.activity_list_limit', 31), + natural_number_validator, + limit_to_configured_maximum('ckan.activity_list_limit_max', 100)] return schema @validator_args -def default_activity_list_schema(not_missing, unicode_safe): +def default_activity_list_schema( + not_missing, unicode_safe, configured_default, + natural_number_validator, limit_to_configured_maximum): schema = default_pagination_schema() schema['id'] = [not_missing, unicode_safe] + schema['limit'] = [ + configured_default('ckan.activity_list_limit', 31), + natural_number_validator, + limit_to_configured_maximum('ckan.activity_list_limit_max', 100)] return schema diff --git a/ckan/tests/logic/action/test_get.py b/ckan/tests/logic/action/test_get.py index a6f6439af77..9c9ba74e6fa 100644 --- a/ckan/tests/logic/action/test_get.py +++ b/ckan/tests/logic/action/test_get.py @@ -2253,10 +2253,11 @@ class TestPackageActivityList(helpers.FunctionalTestBase): def _create_bulk_package_activities(self, count): dataset = factories.Dataset() from ckan import model - objs = [model.Activity( - user_id=None, object_id=dataset['id'], revision_id=None, - activity_type=None, data=None) - for i in range(count)] + objs = [ + model.Activity( + user_id=None, object_id=dataset['id'], revision_id=None, + activity_type=None, data=None) + for i in range(count)] model.Session.add_all(objs) model.repo.commit_and_remove() return dataset['id'] @@ -2285,10 +2286,11 @@ class TestUserActivityList(helpers.FunctionalTestBase): def _create_bulk_user_activities(self, count): user = factories.User() from ckan import model - objs = [model.Activity( - user_id=user['id'], object_id=None, revision_id=None, - activity_type=None, data=None) - for i in range(count)] + objs = [ + model.Activity( + user_id=user['id'], object_id=None, revision_id=None, + activity_type=None, data=None) + for i in range(count)] model.Session.add_all(objs) model.repo.commit_and_remove() return user['id'] @@ -2317,10 +2319,11 @@ class TestGroupActivityList(helpers.FunctionalTestBase): def _create_bulk_group_activities(self, count): group = factories.Group() from ckan import model - objs = [model.Activity( - user_id=None, object_id=group['id'], revision_id=None, - activity_type=None, data=None) - for i in range(count)] + objs = [ + model.Activity( + user_id=None, object_id=group['id'], revision_id=None, + activity_type=None, data=None) + for i in range(count)] model.Session.add_all(objs) model.repo.commit_and_remove() return group['id'] @@ -2349,10 +2352,11 @@ class TestOrganizationActivityList(helpers.FunctionalTestBase): def _create_bulk_org_activities(self, count): org = factories.Organization() from ckan import model - objs = [model.Activity( - user_id=None, object_id=org['id'], revision_id=None, - activity_type=None, data=None) - for i in range(count)] + objs = [ + model.Activity( + user_id=None, object_id=org['id'], revision_id=None, + activity_type=None, data=None) + for i in range(count)] model.Session.add_all(objs) model.repo.commit_and_remove() return org['id'] @@ -2380,10 +2384,11 @@ def test_limit_hits_max(self): class TestRecentlyChangedPackagesActivityList(helpers.FunctionalTestBase): def _create_bulk_package_activities(self, count): from ckan import model - objs = [model.Activity( - user_id=None, object_id=None, revision_id=None, - activity_type='new_package', data=None) - for i in range(count)] + objs = [ + model.Activity( + user_id=None, object_id=None, revision_id=None, + activity_type='new_package', data=None) + for i in range(count)] model.Session.add_all(objs) model.repo.commit_and_remove() @@ -2404,4 +2409,4 @@ def test_limit_hits_max(self): self._create_bulk_package_activities(9) results = logic.get_action('recently_changed_packages_activity_list')( {}, {'limit': '9'}) - eq(len(results), 7) # i.e. ckan.activity_list_limit_max \ No newline at end of file + eq(len(results), 7) # i.e. ckan.activity_list_limit_max From 72b249802f56017750069df4f608eb5640e816a7 Mon Sep 17 00:00:00 2001 From: David Read Date: Fri, 28 Sep 2018 19:54:06 +0100 Subject: [PATCH 44/99] Fix tests --- ckan/controllers/api.py | 2 +- ckan/lib/search/query.py | 4 +- ckan/logic/action/get.py | 40 +++++++++---------- ckan/tests/legacy/functional/test_activity.py | 2 +- 4 files changed, 25 insertions(+), 23 deletions(-) diff --git a/ckan/controllers/api.py b/ckan/controllers/api.py index f7e4c0b6023..acef129ab25 100644 --- a/ckan/controllers/api.py +++ b/ckan/controllers/api.py @@ -22,7 +22,7 @@ from ckan.views import identify_user -from ckan.common import _, c, request, response +from ckan.common import _, c, request, response, config log = logging.getLogger(__name__) diff --git a/ckan/lib/search/query.py b/ckan/lib/search/query.py index 6f61f3458d3..68877959982 100644 --- a/ckan/lib/search/query.py +++ b/ckan/lib/search/query.py @@ -312,7 +312,9 @@ def run(self, query, permission_labels=None, **kwargs): query['q'] = "*:*" # number of results - rows_to_return = query['rows'] # defaulted & made an int by schema + rows_to_return = int(query.get('rows', 10)) + # query['rows'] should be a defaulted int, due to schema, but make + # certain, for legacy tests if rows_to_return > 0: # #1683 Work around problem of last result being out of order # in SOLR 1.4 diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index 06cd99f87dd..8fcc97b67e9 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -2463,8 +2463,8 @@ def user_activity_list(context, data_dict): :type offset: int :param limit: the maximum number of activities to return (optional, default: ``31`` unless set in site's configuration - ``ckan.activity_list_limit``, upper limit: ``100`` unless set in - site's configuration ``ckan.activity_list_limit_max``) + ``ckan.activity_list_limit``, upper limit: ``100`` unless set in + site's configuration ``ckan.activity_list_limit_max``) :type limit: int :rtype: list of dictionaries @@ -2505,8 +2505,8 @@ def package_activity_list(context, data_dict): :type offset: int :param limit: the maximum number of activities to return (optional, default: ``31`` unless set in site's configuration - ``ckan.activity_list_limit``, upper limit: ``100`` unless set in - site's configuration ``ckan.activity_list_limit_max``) + ``ckan.activity_list_limit``, upper limit: ``100`` unless set in + site's configuration ``ckan.activity_list_limit_max``) :type limit: int :rtype: list of dictionaries @@ -2547,8 +2547,8 @@ def group_activity_list(context, data_dict): :type offset: int :param limit: the maximum number of activities to return (optional, default: ``31`` unless set in site's configuration - ``ckan.activity_list_limit``, upper limit: ``100`` unless set in - site's configuration ``ckan.activity_list_limit_max``) + ``ckan.activity_list_limit``, upper limit: ``100`` unless set in + site's configuration ``ckan.activity_list_limit_max``) :type limit: int :rtype: list of dictionaries @@ -2586,8 +2586,8 @@ def organization_activity_list(context, data_dict): :type offset: int :param limit: the maximum number of activities to return (optional, default: ``31`` unless set in site's configuration - ``ckan.activity_list_limit``, upper limit: ``100`` unless set in - site's configuration ``ckan.activity_list_limit_max``) + ``ckan.activity_list_limit``, upper limit: ``100`` unless set in + site's configuration ``ckan.activity_list_limit_max``) :type limit: int :rtype: list of dictionaries @@ -2623,8 +2623,8 @@ def recently_changed_packages_activity_list(context, data_dict): :type offset: int :param limit: the maximum number of activities to return (optional, default: ``31`` unless set in site's configuration - ``ckan.activity_list_limit``, upper limit: ``100`` unless set in - site's configuration ``ckan.activity_list_limit_max``) + ``ckan.activity_list_limit``, upper limit: ``100`` unless set in + site's configuration ``ckan.activity_list_limit_max``) :type limit: int :rtype: list of dictionaries @@ -2674,8 +2674,8 @@ def user_activity_list_html(context, data_dict): :type offset: int :param limit: the maximum number of activities to return (optional, default: ``31`` unless set in site's configuration - ``ckan.activity_list_limit``, upper limit: ``100`` unless set in - site's configuration ``ckan.activity_list_limit_max``) + ``ckan.activity_list_limit``, upper limit: ``100`` unless set in + site's configuration ``ckan.activity_list_limit_max``) :type limit: int :rtype: string @@ -2706,8 +2706,8 @@ def package_activity_list_html(context, data_dict): :type offset: int :param limit: the maximum number of activities to return (optional, default: ``31`` unless set in site's configuration - ``ckan.activity_list_limit``, upper limit: ``100`` unless set in - site's configuration ``ckan.activity_list_limit_max``) + ``ckan.activity_list_limit``, upper limit: ``100`` unless set in + site's configuration ``ckan.activity_list_limit_max``) :type limit: int :rtype: string @@ -2738,8 +2738,8 @@ def group_activity_list_html(context, data_dict): :type offset: int :param limit: the maximum number of activities to return (optional, default: ``31`` unless set in site's configuration - ``ckan.activity_list_limit``, upper limit: ``100`` unless set in - site's configuration ``ckan.activity_list_limit_max``) + ``ckan.activity_list_limit``, upper limit: ``100`` unless set in + site's configuration ``ckan.activity_list_limit_max``) :type limit: int :rtype: string @@ -2770,8 +2770,8 @@ def organization_activity_list_html(context, data_dict): :type offset: int :param limit: the maximum number of activities to return (optional, default: ``31`` unless set in site's configuration - ``ckan.activity_list_limit``, upper limit: ``100`` unless set in - site's configuration ``ckan.activity_list_limit_max``) + ``ckan.activity_list_limit``, upper limit: ``100`` unless set in + site's configuration ``ckan.activity_list_limit_max``) :type limit: int :rtype: string @@ -2802,8 +2802,8 @@ def recently_changed_packages_activity_list_html(context, data_dict): :type offset: int :param limit: the maximum number of activities to return (optional, default: ``31`` unless set in site's configuration - ``ckan.activity_list_limit``, upper limit: ``100`` unless set in - site's configuration ``ckan.activity_list_limit_max``) + ``ckan.activity_list_limit``, upper limit: ``100`` unless set in + site's configuration ``ckan.activity_list_limit_max``) :type limit: int :rtype: string diff --git a/ckan/tests/legacy/functional/test_activity.py b/ckan/tests/legacy/functional/test_activity.py index 5a5d9aabc12..f7db62f3dcd 100644 --- a/ckan/tests/legacy/functional/test_activity.py +++ b/ckan/tests/legacy/functional/test_activity.py @@ -37,7 +37,7 @@ def setup(cls): def teardown(cls): ckan.model.repo.rebuild_db() - + @helpers.change_config('ckan.activity_list_limit', '15') def test_user_activity(self): """Test user activity streams HTML rendering.""" From 8729f8f0bca02f74d8b68195d9c904a0ece7ce23 Mon Sep 17 00:00:00 2001 From: David Read Date: Fri, 28 Sep 2018 20:15:52 +0100 Subject: [PATCH 45/99] Add limit to dashboard_activity_list --- ckan/logic/action/get.py | 5 ++- ckan/tests/logic/action/test_get.py | 70 ++++++++++++++++++++--------- 2 files changed, 53 insertions(+), 22 deletions(-) diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index 8fcc97b67e9..04cb5354922 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -3310,8 +3310,9 @@ def dashboard_activity_list(context, data_dict): (optional, default: ``0``) :type offset: int :param limit: the maximum number of activities to return - (optional, default: ``31``, the default value is configurable via the - :ref:`ckan.activity_list_limit` setting) + (optional, default: ``31`` unless set in site's configuration + ``ckan.activity_list_limit``, upper limit: ``100`` unless set in + site's configuration ``ckan.activity_list_limit_max``) :type limit: int :rtype: list of activity dictionaries diff --git a/ckan/tests/logic/action/test_get.py b/ckan/tests/logic/action/test_get.py index 9c9ba74e6fa..d0d70c0befa 100644 --- a/ckan/tests/logic/action/test_get.py +++ b/ckan/tests/logic/action/test_get.py @@ -2264,21 +2264,20 @@ def _create_bulk_package_activities(self, count): def test_limit_default(self): id = self._create_bulk_package_activities(35) - results = logic.get_action('package_activity_list')({}, {'id': id}) + results = helpers.call_action('package_activity_list', id=id) eq(len(results), 31) # i.e. default value @helpers.change_config('ckan.activity_list_limit', '5') def test_limit_configured(self): id = self._create_bulk_package_activities(7) - results = logic.get_action('package_activity_list')({}, {'id': id}) + results = helpers.call_action('package_activity_list', id=id) eq(len(results), 5) # i.e. ckan.activity_list_limit @helpers.change_config('ckan.activity_list_limit', '5') @helpers.change_config('ckan.activity_list_limit_max', '7') def test_limit_hits_max(self): id = self._create_bulk_package_activities(9) - results = logic.get_action('package_activity_list')( - {}, {'id': id, 'limit': '9'}) + results = helpers.call_action('package_activity_list', id=id, limit='9') eq(len(results), 7) # i.e. ckan.activity_list_limit_max @@ -2297,21 +2296,20 @@ def _create_bulk_user_activities(self, count): def test_limit_default(self): id = self._create_bulk_user_activities(35) - results = logic.get_action('user_activity_list')({}, {'id': id}) + results = helpers.call_action('user_activity_list', id=id) eq(len(results), 31) # i.e. default value @helpers.change_config('ckan.activity_list_limit', '5') def test_limit_configured(self): id = self._create_bulk_user_activities(7) - results = logic.get_action('user_activity_list')({}, {'id': id}) + results = helpers.call_action('user_activity_list', id=id) eq(len(results), 5) # i.e. ckan.activity_list_limit @helpers.change_config('ckan.activity_list_limit', '5') @helpers.change_config('ckan.activity_list_limit_max', '7') def test_limit_hits_max(self): id = self._create_bulk_user_activities(9) - results = logic.get_action('user_activity_list')( - {}, {'id': id, 'limit': '9'}) + results = helpers.call_action('user_activity_list', id=id, limit='9') eq(len(results), 7) # i.e. ckan.activity_list_limit_max @@ -2330,21 +2328,20 @@ def _create_bulk_group_activities(self, count): def test_limit_default(self): id = self._create_bulk_group_activities(35) - results = logic.get_action('group_activity_list')({}, {'id': id}) + results = helpers.call_action('group_activity_list', id=id) eq(len(results), 31) # i.e. default value @helpers.change_config('ckan.activity_list_limit', '5') def test_limit_configured(self): id = self._create_bulk_group_activities(7) - results = logic.get_action('group_activity_list')({}, {'id': id}) + results = helpers.call_action('group_activity_list', id=id) eq(len(results), 5) # i.e. ckan.activity_list_limit @helpers.change_config('ckan.activity_list_limit', '5') @helpers.change_config('ckan.activity_list_limit_max', '7') def test_limit_hits_max(self): id = self._create_bulk_group_activities(9) - results = logic.get_action('group_activity_list')( - {}, {'id': id, 'limit': '9'}) + results = helpers.call_action('group_activity_list', id=id, limit='9') eq(len(results), 7) # i.e. ckan.activity_list_limit_max @@ -2363,21 +2360,20 @@ def _create_bulk_org_activities(self, count): def test_limit_default(self): id = self._create_bulk_org_activities(35) - results = logic.get_action('organization_activity_list')({}, {'id': id}) + results = helpers.call_action('organization_activity_list', id=id) eq(len(results), 31) # i.e. default value @helpers.change_config('ckan.activity_list_limit', '5') def test_limit_configured(self): id = self._create_bulk_org_activities(7) - results = logic.get_action('organization_activity_list')({}, {'id': id}) + results = helpers.call_action('organization_activity_list', id=id) eq(len(results), 5) # i.e. ckan.activity_list_limit @helpers.change_config('ckan.activity_list_limit', '5') @helpers.change_config('ckan.activity_list_limit_max', '7') def test_limit_hits_max(self): id = self._create_bulk_org_activities(9) - results = logic.get_action('organization_activity_list')( - {}, {'id': id, 'limit': '9'}) + results = helpers.call_action('organization_activity_list', id=id, limit='9') eq(len(results), 7) # i.e. ckan.activity_list_limit_max @@ -2394,19 +2390,53 @@ def _create_bulk_package_activities(self, count): def test_limit_default(self): self._create_bulk_package_activities(35) - results = logic.get_action('recently_changed_packages_activity_list')({}, {}) + results = helpers.call_action('recently_changed_packages_activity_list') eq(len(results), 31) # i.e. default value @helpers.change_config('ckan.activity_list_limit', '5') def test_limit_configured(self): self._create_bulk_package_activities(7) - results = logic.get_action('recently_changed_packages_activity_list')({}, {}) + results = helpers.call_action('recently_changed_packages_activity_list') eq(len(results), 5) # i.e. ckan.activity_list_limit @helpers.change_config('ckan.activity_list_limit', '5') @helpers.change_config('ckan.activity_list_limit_max', '7') def test_limit_hits_max(self): self._create_bulk_package_activities(9) - results = logic.get_action('recently_changed_packages_activity_list')( - {}, {'limit': '9'}) + results = helpers.call_action('recently_changed_packages_activity_list', limit='9') + eq(len(results), 7) # i.e. ckan.activity_list_limit_max + + +class TestDashboardActivityList(helpers.FunctionalTestBase): + def _create_bulk_package_activities(self, count): + user = factories.User() + from ckan import model + objs = [ + model.Activity( + user_id=user['id'], object_id=None, revision_id=None, + activity_type=None, data=None) + for i in range(count)] + model.Session.add_all(objs) + model.repo.commit_and_remove() + return user['id'] + + def test_limit_default(self): + id = self._create_bulk_package_activities(35) + results = helpers.call_action('dashboard_activity_list', + context={'user': id}) + eq(len(results), 31) # i.e. default value + + @helpers.change_config('ckan.activity_list_limit', '5') + def test_limit_configured(self): + id = self._create_bulk_package_activities(7) + results = helpers.call_action('dashboard_activity_list', + context={'user': id}) + eq(len(results), 5) # i.e. ckan.activity_list_limit + + @helpers.change_config('ckan.activity_list_limit', '5') + @helpers.change_config('ckan.activity_list_limit_max', '7') + def test_limit_hits_max(self): + id = self._create_bulk_package_activities(9) + results = helpers.call_action('dashboard_activity_list', limit='9', + context={'user': id}) eq(len(results), 7) # i.e. ckan.activity_list_limit_max From bd01afe8139eb8cb75277503089b96a8247beb7e Mon Sep 17 00:00:00 2001 From: David Read Date: Fri, 28 Sep 2018 21:10:15 +0100 Subject: [PATCH 46/99] Limit group/organization_show --- ckan/lib/dictization/model_dictize.py | 5 +++-- ckan/lib/navl/validators.py | 5 +++++ .../lib/dictization/test_model_dictize.py | 13 +++++++++++ ckan/tests/logic/action/test_get.py | 22 +++++++++++++++++++ doc/maintaining/configuration.rst | 5 ++++- 5 files changed, 47 insertions(+), 3 deletions(-) diff --git a/ckan/lib/dictization/model_dictize.py b/ckan/lib/dictization/model_dictize.py index 23b6f5ae524..18017352134 100644 --- a/ckan/lib/dictization/model_dictize.py +++ b/ckan/lib/dictization/model_dictize.py @@ -388,11 +388,12 @@ def get_packages_for_this_group(group_, just_the_count=False): q['include_private'] = True if not just_the_count: - # Is there a packages limit in the context? + # package_search limits 'rows' anyway, so this is only if you + # want even fewer try: packages_limit = context['limits']['packages'] except KeyError: - q['rows'] = 1000 # Only the first 1000 datasets are returned + del q['rows'] # leave it to package_search to limit it else: q['rows'] = packages_limit diff --git a/ckan/lib/navl/validators.py b/ckan/lib/navl/validators.py index 6022722ce21..a88146b1cb9 100644 --- a/ckan/lib/navl/validators.py +++ b/ckan/lib/navl/validators.py @@ -175,6 +175,11 @@ def unicode_safe(value): return u'\N{REPLACEMENT CHARACTER}' def limit_to_configured_maximum(config_option, default_limit): + ''' + If the value is over a limit, it changes it to the limit. The limit is + defined by a configuration option, or if that is not set, a given int + default_limit. + ''' def callable(key, data, errors, context): value = data.get(key) diff --git a/ckan/tests/lib/dictization/test_model_dictize.py b/ckan/tests/lib/dictization/test_model_dictize.py index 43de44788c1..011391e8db9 100644 --- a/ckan/tests/lib/dictization/test_model_dictize.py +++ b/ckan/tests/lib/dictization/test_model_dictize.py @@ -240,6 +240,19 @@ def test_group_dictize_with_package_list_limited_over(self): assert_equal(len(group['packages']), 3) + @helpers.change_config('ckan.search.rows_max', '4') + def test_group_dictize_with_package_list_limited_by_config(self): + group_ = factories.Group() + for _ in range(5): + factories.Dataset(groups=[{'name': group_['name']}]) + group_obj = model.Session.query(model.Group).filter_by().first() + context = {'model': model, 'session': model.Session} + + group = model_dictize.group_dictize(group_obj, context) + + assert_equal(len(group['packages']), 4) + # limited by ckan.search.rows_max + def test_group_dictize_with_package_count(self): # group_list_dictize calls it like this by default group_ = factories.Group() diff --git a/ckan/tests/logic/action/test_get.py b/ckan/tests/logic/action/test_get.py index d0d70c0befa..e4a975fe36c 100644 --- a/ckan/tests/logic/action/test_get.py +++ b/ckan/tests/logic/action/test_get.py @@ -393,6 +393,17 @@ def test_group_show_does_not_show_private_datasets(self): in group['packages']], ( "group_show() should never show private datasets") + @helpers.change_config('ckan.search.rows_max', '5') + def test_package_limit_configured(self): + group = factories.Group() + for i in range(7): + factories.Dataset(groups=[{'id': group['id']}]) + id = group['id'] + + results = helpers.call_action('group_show', id=id, + include_datasets=1) + eq(len(results['packages']), 5) # i.e. ckan.search.rows_max + class TestOrganizationList(helpers.FunctionalTestBase): @@ -522,6 +533,17 @@ def test_organization_show_private_packages_not_returned(self): assert org_dict['packages'][0]['name'] == 'dataset_1' assert org_dict['package_count'] == 1 + @helpers.change_config('ckan.search.rows_max', '5') + def test_package_limit_configured(self): + org = factories.Organization() + for i in range(7): + factories.Dataset(owner_org=org['id']) + id = org['id'] + + results = helpers.call_action('organization_show', id=id, + include_datasets=1) + eq(len(results['packages']), 5) # i.e. ckan.search.rows_max + class TestUserList(helpers.FunctionalTestBase): diff --git a/doc/maintaining/configuration.rst b/doc/maintaining/configuration.rst index fbdbf1b4510..f7836500a0e 100644 --- a/doc/maintaining/configuration.rst +++ b/doc/maintaining/configuration.rst @@ -745,7 +745,10 @@ Example:: Default value: ``1000`` -Maximum allowed value for ``package_search``'s ``rows`` parameter. +Maximum allowed value for rows returned. Specifically this limits: + +* ``package_search``'s ``rows`` parameter +* ``group_show`` and ``organization_show``'s number of datasets returned when specifying ``include_datasets=true`` Redis Settings --------------- From 10347c72855103d9cca8d6ad5742afba832d5ed5 Mon Sep 17 00:00:00 2001 From: David Read Date: Fri, 28 Sep 2018 22:37:21 +0100 Subject: [PATCH 47/99] Add limit for group/organization_list when all_fields is specified --- ckan/logic/action/get.py | 12 ++++++ ckan/model/group.py | 4 +- .../legacy/functional/api/test_dashboard.py | 1 + ckan/tests/logic/action/test_get.py | 38 +++++++++++++++++++ doc/maintaining/configuration.rst | 16 ++++++++ 5 files changed, 70 insertions(+), 1 deletion(-) diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index 04cb5354922..cf2c2745dbd 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -342,6 +342,12 @@ def _group_or_org_list(context, data_dict, is_org=False): all_fields = asbool(data_dict.get('all_fields', None)) + # all_fields is really computationally expensive, so need a tight limit + if all_fields: + max_limit = config.get('ckan.search.group_and_org_all_fields_max', 25) + if limit is None or limit > max_limit: + limit = max_limit + # order_by deprecated in ckan 1.8 # if it is supplied and sort isn't use order_by and raise a warning order_by = data_dict.get('order_by', '') @@ -450,6 +456,9 @@ def group_list(context, data_dict): :type groups: list of strings :param all_fields: return group dictionaries instead of just names. Only core fields are returned - get some more using the include_* options. + Because ``all_fields`` is computationally expensive, using it will set + the ``limit`` to a max of ``25`` or whatever the site has configured: + ``ckan.search.group_and_org_all_fields_max``. Returning a list of packages is too expensive, so the `packages` property for each group is deprecated, but there is a count of the packages in the `package_count` property. @@ -500,6 +509,9 @@ def organization_list(context, data_dict): :type organizations: list of strings :param all_fields: return group dictionaries instead of just names. Only core fields are returned - get some more using the include_* options. + Because ``all_fields`` is computationally expensive, using it will set + the ``limit`` to a max of ``25`` or whatever the site has configured: + ``ckan.search.group_and_org_all_fields_max``. Returning a list of packages is too expensive, so the `packages` property for each group is deprecated, but there is a count of the packages in the `package_count` property. diff --git a/ckan/model/group.py b/ckan/model/group.py index 18fe6a16cdf..469d0669329 100644 --- a/ckan/model/group.py +++ b/ckan/model/group.py @@ -116,13 +116,15 @@ class Group(vdm.sqlalchemy.RevisionedObjectMixin, domain_object.DomainObject): def __init__(self, name=u'', title=u'', description=u'', image_url=u'', - type=u'group', approval_status=u'approved'): + type=u'group', approval_status=u'approved', + is_organization=False): self.name = name self.title = title self.description = description self.image_url = image_url self.type = type self.approval_status = approval_status + self.is_organization = is_organization @property def display_name(self): diff --git a/ckan/tests/legacy/functional/api/test_dashboard.py b/ckan/tests/legacy/functional/api/test_dashboard.py index d1fa99dcf69..11e7d33eae7 100644 --- a/ckan/tests/legacy/functional/api/test_dashboard.py +++ b/ckan/tests/legacy/functional/api/test_dashboard.py @@ -311,6 +311,7 @@ def test_07_mark_new_activities_as_read(self): assert self.dashboard_new_activities_count(self.new_user) == 0 assert len(self.dashboard_new_activities(self.new_user)) == 0 + @helpers.change_config('ckan.activity_list_limit', '15') def test_08_maximum_number_of_new_activities(self): '''Test that the new activities count does not go higher than 15, even if there are more than 15 new activities from the user's followers.''' diff --git a/ckan/tests/logic/action/test_get.py b/ckan/tests/logic/action/test_get.py index e4a975fe36c..914396000c5 100644 --- a/ckan/tests/logic/action/test_get.py +++ b/ckan/tests/logic/action/test_get.py @@ -163,6 +163,24 @@ def test_group_list_all_fields(self): assert 'users' not in group_list[0] assert 'datasets' not in group_list[0] + def _create_bulk_groups(self, name, count): + from ckan import model + model.repo.new_revision() + groups = [model.Group(name='{}_{}'.format(name, i)) + for i in range(count)] + model.Session.add_all(groups) + model.repo.commit_and_remove() + + def test_no_default_limit(self): + self._create_bulk_groups('group_default', 30) + results = helpers.call_action('group_list') + eq(len(results), 30) # i.e. not limited + + def test_all_fields_limit(self): + self._create_bulk_groups('group_all_fields_default', 30) + results = helpers.call_action('group_list', all_fields=True) + eq(len(results), 25) # i.e. default value + def test_group_list_extras_returned(self): group = factories.Group(extras=[{'key': 'key1', 'value': 'val1'}]) @@ -462,6 +480,26 @@ def test_organization_list_return_custom_organization_type(self): assert (sorted(org_list) == sorted([g['name'] for g in [org2]])), '{}'.format(org_list) + def _create_bulk_orgs(self, name, count): + from ckan import model + model.repo.new_revision() + orgs = [model.Group(name='{}_{}'.format(name, i), is_organization=True, + type='organization') + for i in range(count)] + model.Session.add_all(orgs) + model.repo.commit_and_remove() + + def test_no_default_limit(self): + self._create_bulk_orgs('org_default', 30) + results = helpers.call_action('organization_list') + eq(len(results), 30) # i.e. not limited + + def test_all_fields_limit(self): + self._create_bulk_orgs('org_all_fields_default', 30) + results = helpers.call_action('organization_list', all_fields=True) + eq(len(results), 25) # i.e. default value + + class TestOrganizationShow(helpers.FunctionalTestBase): diff --git a/doc/maintaining/configuration.rst b/doc/maintaining/configuration.rst index f7836500a0e..c0ffc9eaab3 100644 --- a/doc/maintaining/configuration.rst +++ b/doc/maintaining/configuration.rst @@ -750,6 +750,22 @@ Maximum allowed value for rows returned. Specifically this limits: * ``package_search``'s ``rows`` parameter * ``group_show`` and ``organization_show``'s number of datasets returned when specifying ``include_datasets=true`` +.. _ckan.search.group_and_org_all_fields_max: + +ckan.search.group_and_org_all_fields_max +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Example:: + + ckan.search.group_and_org_all_fields_max = 100 + +Default value: ``25`` + +Maximum number of groups/organizations returned when listing them in detail. Specifically this limits: + +* ``group_list``'s ``limit`` when ``all_fields=true`` +* ``organization_list``'s ``limit`` when ``all_fields=true`` + Redis Settings --------------- From f7f081ec7eb014674b8546b7a6c29fd619db7bb9 Mon Sep 17 00:00:00 2001 From: David Read Date: Fri, 28 Sep 2018 23:05:07 +0100 Subject: [PATCH 48/99] Add limit to group/org_list when not all_fields --- CHANGELOG.rst | 5 ++++ ckan/logic/action/get.py | 15 ++++++---- ckan/tests/logic/action/test_get.py | 43 ++++++++++++++++++++++------- doc/maintaining/configuration.rst | 24 +++++++++++++--- 4 files changed, 67 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 34b50544daa..35599192526 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -14,6 +14,11 @@ Minor changes: * For navl schemas, the 'default' validator no longer applies the default when the value is False, 0, [] or {} (#4448) + * If you've customized the schema for package_search, you'll need to add to it + the limiting of ``row``, as per default_package_search_schema now does (#4484) + * Several logic functions now have new limits to how many items can be + returned, notably ``group_list`` and ``organization_list`` when + ``all_fields=true``. These are all configurable. (#4484) Bugfixes: diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index cf2c2745dbd..f6564130cfc 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -342,11 +342,14 @@ def _group_or_org_list(context, data_dict, is_org=False): all_fields = asbool(data_dict.get('all_fields', None)) - # all_fields is really computationally expensive, so need a tight limit if all_fields: - max_limit = config.get('ckan.search.group_and_org_all_fields_max', 25) - if limit is None or limit > max_limit: - limit = max_limit + # all_fields is really computationally expensive, so need a tight limit + max_limit = config.get( + 'ckan.group_and_organization_list_all_fields_max', 25) + else: + max_limit = config.get('ckan.group_and_organization_list_max', 1000) + if limit is None or limit > max_limit: + limit = max_limit # order_by deprecated in ckan 1.8 # if it is supplied and sort isn't use order_by and raise a warning @@ -458,7 +461,7 @@ def group_list(context, data_dict): core fields are returned - get some more using the include_* options. Because ``all_fields`` is computationally expensive, using it will set the ``limit`` to a max of ``25`` or whatever the site has configured: - ``ckan.search.group_and_org_all_fields_max``. + ``ckan.group_and_organization_list_all_fields_max``. Returning a list of packages is too expensive, so the `packages` property for each group is deprecated, but there is a count of the packages in the `package_count` property. @@ -511,7 +514,7 @@ def organization_list(context, data_dict): core fields are returned - get some more using the include_* options. Because ``all_fields`` is computationally expensive, using it will set the ``limit`` to a max of ``25`` or whatever the site has configured: - ``ckan.search.group_and_org_all_fields_max``. + ``ckan.group_and_organization_list_all_fields_max``. Returning a list of packages is too expensive, so the `packages` property for each group is deprecated, but there is a count of the packages in the `package_count` property. diff --git a/ckan/tests/logic/action/test_get.py b/ckan/tests/logic/action/test_get.py index 914396000c5..514b5cc8363 100644 --- a/ckan/tests/logic/action/test_get.py +++ b/ckan/tests/logic/action/test_get.py @@ -167,20 +167,32 @@ def _create_bulk_groups(self, name, count): from ckan import model model.repo.new_revision() groups = [model.Group(name='{}_{}'.format(name, i)) - for i in range(count)] + for i in range(count)] model.Session.add_all(groups) model.repo.commit_and_remove() - def test_no_default_limit(self): - self._create_bulk_groups('group_default', 30) + def test_limit_default(self): + self._create_bulk_groups('group_default', 1010) results = helpers.call_action('group_list') - eq(len(results), 30) # i.e. not limited + eq(len(results), 1000) # i.e. default value - def test_all_fields_limit(self): - self._create_bulk_groups('group_all_fields_default', 30) + @helpers.change_config('ckan.group_and_organization_list_max', '5') + def test_limit_configured(self): + self._create_bulk_groups('group_default', 7) + results = helpers.call_action('group_list') + eq(len(results), 5) # i.e. configured limit + + def test_all_fields_limit_default(self): + self._create_bulk_groups('org_all_fields_default', 30) results = helpers.call_action('group_list', all_fields=True) eq(len(results), 25) # i.e. default value + @helpers.change_config('ckan.group_and_organization_list_all_fields_max', '5') + def test_all_fields_limit_configured(self): + self._create_bulk_groups('org_all_fields_default', 30) + results = helpers.call_action('group_list', all_fields=True) + eq(len(results), 5) # i.e. configured limit + def test_group_list_extras_returned(self): group = factories.Group(extras=[{'key': 'key1', 'value': 'val1'}]) @@ -489,16 +501,27 @@ def _create_bulk_orgs(self, name, count): model.Session.add_all(orgs) model.repo.commit_and_remove() - def test_no_default_limit(self): - self._create_bulk_orgs('org_default', 30) + def test_limit_default(self): + self._create_bulk_orgs('org_default', 1010) + results = helpers.call_action('organization_list') + eq(len(results), 1000) # i.e. default value + + @helpers.change_config('ckan.group_and_organization_list_max', '5') + def test_limit_configured(self): + self._create_bulk_orgs('org_default', 7) results = helpers.call_action('organization_list') - eq(len(results), 30) # i.e. not limited + eq(len(results), 5) # i.e. configured limit - def test_all_fields_limit(self): + def test_all_fields_limit_default(self): self._create_bulk_orgs('org_all_fields_default', 30) results = helpers.call_action('organization_list', all_fields=True) eq(len(results), 25) # i.e. default value + @helpers.change_config('ckan.group_and_organization_list_all_fields_max', '5') + def test_all_fields_limit_configured(self): + self._create_bulk_orgs('org_all_fields_default', 30) + results = helpers.call_action('organization_list', all_fields=True) + eq(len(results), 5) # i.e. configured limit class TestOrganizationShow(helpers.FunctionalTestBase): diff --git a/doc/maintaining/configuration.rst b/doc/maintaining/configuration.rst index c0ffc9eaab3..6eb4aa0ddef 100644 --- a/doc/maintaining/configuration.rst +++ b/doc/maintaining/configuration.rst @@ -750,14 +750,30 @@ Maximum allowed value for rows returned. Specifically this limits: * ``package_search``'s ``rows`` parameter * ``group_show`` and ``organization_show``'s number of datasets returned when specifying ``include_datasets=true`` -.. _ckan.search.group_and_org_all_fields_max: +.. _ckan.group_and_organization_list_max: -ckan.search.group_and_org_all_fields_max -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +ckan.group_and_organization_list_max +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Example:: - ckan.search.group_and_org_all_fields_max = 100 + ckan.group_and_organization_list_max = 1000 + +Default value: ``1000`` + +Maximum number of groups/organizations returned when listing them. Specifically this limits: + +* ``group_list``'s ``limit`` when ``all_fields=false`` +* ``organization_list``'s ``limit`` when ``all_fields=false`` + +.. _ckan.group_and_organization_list_all_fields_max: + +ckan.group_and_organization_list_all_fields_max +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Example:: + + ckan.group_and_organization_list_all_fields_max = 100 Default value: ``25`` From 6d2819590a60e0bcc70d737c9bf591425e7f281f Mon Sep 17 00:00:00 2001 From: David Read Date: Fri, 28 Sep 2018 23:13:23 +0100 Subject: [PATCH 49/99] Document latest limit. --- ckan/logic/action/get.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index f6564130cfc..b408740a104 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -447,9 +447,11 @@ def group_list(context, data_dict): "name asc" string of field name and sort-order. The allowed fields are 'name', 'package_count' and 'title' :type sort: string - :param limit: if given, the list of groups will be broken into pages of - at most ``limit`` groups per page and only one page will be returned - at a time (optional) + :param limit: the maximum number of groups returned (optional) + Default: ``1000`` when all_fields=false unless set in site's + configuration ``ckan.group_and_organization_list_max`` + Default: ``25`` when all_fields=true unless set in site's + configuration ``ckan.group_and_organization_list_all_fields_max`` :type limit: int :param offset: when ``limit`` is given, the offset to start returning groups from @@ -459,9 +461,6 @@ def group_list(context, data_dict): :type groups: list of strings :param all_fields: return group dictionaries instead of just names. Only core fields are returned - get some more using the include_* options. - Because ``all_fields`` is computationally expensive, using it will set - the ``limit`` to a max of ``25`` or whatever the site has configured: - ``ckan.group_and_organization_list_all_fields_max``. Returning a list of packages is too expensive, so the `packages` property for each group is deprecated, but there is a count of the packages in the `package_count` property. @@ -499,9 +498,11 @@ def organization_list(context, data_dict): "name asc" string of field name and sort-order. The allowed fields are 'name', 'package_count' and 'title' :type sort: string - :param limit: if given, the list of organizations will be broken into pages - of at most ``limit`` organizations per page and only one page will be - returned at a time (optional) + :param limit: the maximum number of organizations returned (optional) + Default: ``1000`` when all_fields=false unless set in site's + configuration ``ckan.group_and_organization_list_max`` + Default: ``25`` when all_fields=true unless set in site's + configuration ``ckan.group_and_organization_list_all_fields_max`` :type limit: int :param offset: when ``limit`` is given, the offset to start returning organizations from @@ -512,9 +513,6 @@ def organization_list(context, data_dict): :type organizations: list of strings :param all_fields: return group dictionaries instead of just names. Only core fields are returned - get some more using the include_* options. - Because ``all_fields`` is computationally expensive, using it will set - the ``limit`` to a max of ``25`` or whatever the site has configured: - ``ckan.group_and_organization_list_all_fields_max``. Returning a list of packages is too expensive, so the `packages` property for each group is deprecated, but there is a count of the packages in the `package_count` property. From 5e58451bc418776ed8c9f80ddb6f92e5a64ee80f Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Wed, 3 Oct 2018 17:22:52 +0200 Subject: [PATCH 50/99] Some changes --- ckan/cli/__init__.py | 1 + ckan/cli/cli.py | 14 ++++++++------ ckan/cli/server.py | 4 ++-- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/ckan/cli/__init__.py b/ckan/cli/__init__.py index 009b4ecd03c..13cb87fa35a 100644 --- a/ckan/cli/__init__.py +++ b/ckan/cli/__init__.py @@ -28,6 +28,7 @@ def load_config(config=None): + import pdb; pdb.set_trace() from paste.deploy import appconfig from paste.script.util.logging_config import fileConfig log.info("Searching for configuration file") diff --git a/ckan/cli/cli.py b/ckan/cli/cli.py index ef5dff8f550..d0c230bea17 100644 --- a/ckan/cli/cli.py +++ b/ckan/cli/cli.py @@ -4,7 +4,7 @@ import click -from ckan.cli import db, load_config, click_config_option, search_index, server +from ckan.cli import db, load_config, search_index, server from ckan.config.middleware import make_app import logging @@ -22,18 +22,20 @@ class CkanCommand(object): - def __init__(self, config=None): - self.config = load_config(config) + def __init__(self, conf=None): + self.config = load_config(conf) self.app = make_app(self.config.global_conf, **self.config.local_conf) @click.group() @click.help_option(u'-h', u'--help') -@click_config_option +@click.option(u'--conf', envvar=u'CKANINI', + type=click.File()) @click.pass_context -def ckan(ctx, config, *args, **kwargs): +def ckan(ctx, conf, *args, **kwargs): log.info(u'Loading configuration') - ctx.obj = CkanCommand(config) + import pdb; pdb.set_trace() + ctx.obj = CkanCommand(conf) ckan.add_command(server.run) diff --git a/ckan/cli/server.py b/ckan/cli/server.py index 99ad914ce92..566e42f8d3b 100644 --- a/ckan/cli/server.py +++ b/ckan/cli/server.py @@ -6,7 +6,7 @@ from flask import Flask, current_app from werkzeug.serving import run_simple -from ckan.cli import load_config, click_config_option +from ckan.cli import click_config_option @click.command(u'run', short_help=u'Start development server') @@ -16,7 +16,7 @@ @click.option(u'-p', u'--port', default=5000, help=u'Set port') @click.option(u'-r', u'--reloader', default=True, help=u'Use reloader') @click.pass_context -def run(ctx, config, host, port, reloader): +def run(ctx, host, port, reloader): u'''Runs development server''' # click.secho(u"Starting CKAN", fg='yellow') run_simple(host, port, ctx.obj.app, use_reloader=reloader, use_evalex=True) From 7760a56e94029d90462818f6b0f7fd1fb92b1c30 Mon Sep 17 00:00:00 2001 From: David Read Date: Fri, 5 Oct 2018 13:10:39 +0100 Subject: [PATCH 51/99] Display "*about* 1000000 records" on the frontend when estimated --- ckanext/datastore/logic/action.py | 12 +++++++----- .../theme/public/vendor/ckan.js/ckan.js | 18 ++++++++++-------- .../theme/public/vendor/recline/recline.js | 9 ++++++++- 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/ckanext/datastore/logic/action.py b/ckanext/datastore/logic/action.py index cb2978a2552..07162fa1ac0 100644 --- a/ckanext/datastore/logic/action.py +++ b/ckanext/datastore/logic/action.py @@ -406,12 +406,14 @@ def datastore_search(context, data_dict): (optional, default: true) :type include_total: bool :param total_estimation_threshold: By default, the "total" returned is a - precise count of the query result, which can be computationally + precise count of the query results, which can be computationally expensive for large results (e.g. >100,000k rows). By setting this - value to non-zero, you can set a threshold estimated total, above - which it doesn't do the count - it just returns the estimated - total, generated by quick sampling. NB Total estimation is not - compatible with filter or distinct options. (optional, default: 0) + value to non-zero, and the estimated total is above this value, + then datastore_search will skip the precise count - it just returns + the estimated total. The estimated total is generated by EXPLAIN, + which is triggered by Express Loader or DataPusher as part of + loading the data. NB Total estimation is not compatible when + specifying 'filters' or 'distinct' options. (optional, default: 0) :type total_estimation_threshold: int :param records_format: the format for the records return value: 'objects' (default) list of {fieldname1: value1, ...} dicts, diff --git a/ckanext/reclineview/theme/public/vendor/ckan.js/ckan.js b/ckanext/reclineview/theme/public/vendor/ckan.js/ckan.js index 82a6472a5dc..184ec4b5248 100644 --- a/ckanext/reclineview/theme/public/vendor/ckan.js/ckan.js +++ b/ckanext/reclineview/theme/public/vendor/ckan.js/ckan.js @@ -10,7 +10,7 @@ if (isNodeModule) { } (function(my) { - my.Client = function(endpoint, apiKey) { + my.Client = function(endpoint, apiKey) { this.endpoint = _getEndpoint(endpoint); this.apiKey = apiKey; }; @@ -51,7 +51,8 @@ if (isNodeModule) { var out = { total: results.result.total, fields: fields, - hits: results.result.records + hits: results.result.records, + total_was_estimated: results.result.total_was_estimated }; cb(null, out); }); @@ -67,7 +68,7 @@ if (isNodeModule) { 'bool': 'boolean', }; - // + // my.jsonTableSchema2CkanTypes = { 'string': 'text', 'number': 'float', @@ -128,7 +129,7 @@ if (isNodeModule) { code: obj.status, message: obj.responseText } - cb(err); + cb(err); } if (options.headers) { options.beforeSend = function(req) { @@ -147,7 +148,8 @@ if (isNodeModule) { q: queryObj.q, filters: {}, limit: queryObj.size || 10, - offset: queryObj.from || 0 + offset: queryObj.from || 0, + total_estimation_threshold: 1000 }; if (queryObj.sort && queryObj.sort.length > 0) { @@ -188,7 +190,7 @@ if (isNodeModule) { // This provides connection to the CKAN DataStore (v2) // // General notes -// +// // We need 2 things to make most requests: // // 1. CKAN API endpoint @@ -196,13 +198,13 @@ if (isNodeModule) { // // There are 2 ways to specify this information. // -// EITHER (checked in order): +// EITHER (checked in order): // // * Every dataset must have an id equal to its resource id on the CKAN instance // * The dataset has an endpoint attribute pointing to the CKAN API endpoint // // OR: -// +// // Set the url attribute of the dataset to point to the Resource on the CKAN instance. The endpoint and id will then be automatically computed. var recline = recline || {}; recline.Backend = recline.Backend || {}; diff --git a/ckanext/reclineview/theme/public/vendor/recline/recline.js b/ckanext/reclineview/theme/public/vendor/recline/recline.js index 25caa0520c6..88853e8c0d7 100755 --- a/ckanext/reclineview/theme/public/vendor/recline/recline.js +++ b/ckanext/reclineview/theme/public/vendor/recline/recline.js @@ -422,6 +422,7 @@ my.Dataset = Backbone.Model.extend({ }; this.facets = new my.FacetList(); this.recordCount = null; + this.recordCountWasEstimated = null; this.queryState = new my.Query(); this.queryState.bind('change facet:add', function () { self.query(); // We want to call query() without any arguments. @@ -602,6 +603,7 @@ my.Dataset = Backbone.Model.extend({ _handleQueryResult: function(queryResult) { var self = this; self.recordCount = queryResult.total; + self.recordCountWasEstimated = queryResult.total_was_estimated; var docs = _.map(queryResult.hits, function(hit) { var _doc = new my.Record(hit); _doc.fields = self.fields; @@ -626,6 +628,7 @@ my.Dataset = Backbone.Model.extend({ toTemplateJSON: function() { var data = this.toJSON(); data.recordCount = this.recordCount; + data.recordCountWasEstimated = this.recordCountWasEstimated; data.fields = this.fields.toJSON(); return data; }, @@ -2619,7 +2622,10 @@ my.MultiView = Backbone.View.extend({ \ \
\ - {{recordCount}} records\ + {{#recordCountWasEstimated}} \ + about \ + {{/recordCountWasEstimated}} \ + {{recordCount}} records \
\