Skip to content

Commit

Permalink
Merge branch 'dump' into 4561-limit-datastore_search
Browse files Browse the repository at this point in the history
  • Loading branch information
David Read committed Dec 7, 2018
2 parents 613ba51 + 1e2623c commit 54b56ba
Show file tree
Hide file tree
Showing 35 changed files with 989 additions and 132 deletions.
52 changes: 52 additions & 0 deletions ckan/cli/__init__.py
@@ -0,0 +1,52 @@
# encoding: utf-8

import os

import click
import logging
from logging.config import fileConfig as loggingFileConfig

log = logging.getLogger(__name__)


def error_shout(exception):
click.secho(str(exception), fg=u'red', err=True)


click_config_option = click.option(
u'-c',
u'--config',
default=None,
metavar=u'CONFIG',
help=u'Config file to use (default: development.ini)'
)


def load_config(config=None):
from paste.deploy import appconfig
if config:
filename = os.path.abspath(config)
config_source = u'-c parameter'
elif os.environ.get(u'CKAN_INI'):
filename = os.environ.get(u'CKAN_INI')
config_source = u'$CKAN_INI'
else:
default_filename = u'development.ini'
filename = os.path.join(os.getcwd(), default_filename)
if not os.path.exists(filename):
# give really clear error message for this common situation
msg = u'ERROR: You need to specify the CKAN config (.ini) '\
u'file path.'\
u'\nUse the --config parameter or set environment ' \
u'variable CKAN_INI or have {}\nin the current directory.' \
.format(default_filename)
exit(msg)

if not os.path.exists(filename):
msg = u'Config file not found: %s' % filename
msg += u'\n(Given by: %s)' % config_source
exit(msg)

loggingFileConfig(filename)
log.info(u'Using configuration file {}'.format(filename))
return appconfig(u'config:' + filename)
30 changes: 30 additions & 0 deletions ckan/cli/cli.py
@@ -0,0 +1,30 @@
# encoding: utf-8

import logging

import click

from ckan.cli import click_config_option, db, load_config, search_index, server
from ckan.config.middleware import make_app

log = logging.getLogger(__name__)


class CkanCommand(object):

def __init__(self, conf=None):
self.config = load_config(conf)
self.app = make_app(self.config.global_conf, **self.config.local_conf)


@click.group()
@click.help_option(u'-h', u'--help')
@click_config_option
@click.pass_context
def ckan(ctx, config, *args, **kwargs):
ctx.obj = CkanCommand(config)


ckan.add_command(server.run)
ckan.add_command(db.db)
ckan.add_command(search_index.search_index)
72 changes: 72 additions & 0 deletions ckan/cli/db.py
@@ -0,0 +1,72 @@
# encoding: utf-8

import logging

import click

from ckan.cli import error_shout

log = logging.getLogger(__name__)


@click.group(name=u'db', short_help=u'Database commands')
def db():
pass


@db.command(u'init', short_help=u'Initialize the database')
def initdb():
u'''Initialising the database'''
log.info(u"Initialize the Database")
try:
import ckan.model as model
model.repo.init_db()
except Exception as e:
error_shout(e)
else:
click.secho(u'Initialising DB: SUCCESS', fg=u'green', bold=True)


PROMPT_MSG = u'This will delete all of your data!\nDo you want to continue?'


@db.command(u'clean', short_help=u'Clean the database')
@click.confirmation_option(prompt=PROMPT_MSG)
def cleandb():
u'''Cleaning the database'''
try:
import ckan.model as model
model.repo.clean_db()
except Exception as e:
error_shout(e)
else:
click.secho(u'Cleaning DB: SUCCESS', fg=u'green', bold=True)


@db.command(u'upgrade', short_help=u'Upgrade the database')
@click.option(u'-v', u'--version', help=u'Migration version')
def updatedb(version=None):
u'''Upgrading the database'''
try:
import ckan.model as model
model.repo.upgrade_db(version)
except Exception as e:
error_shout(e)
else:
click.secho(u'Upgrading DB: SUCCESS', fg=u'green', bold=True)


@db.command(u'version', short_help=u'Returns current version of data schema')
def version():
u'''Return current version'''
log.info(u"Returning current DB version")
try:
from ckan.model import Session
ver = Session.execute(u'select version from '
u'migrate_version;').fetchall()
click.secho(
u"Latest data schema version: {0}".format(ver[0][0]),
bold=True
)
except Exception as e:
error_shout(e)
112 changes: 112 additions & 0 deletions ckan/cli/search_index.py
@@ -0,0 +1,112 @@
# encoding: utf-8

import multiprocessing as mp

import click
import sqlalchemy as sa

from ckan.cli import error_shout


@click.group(name=u'search-index', short_help=u'Search index commands')
@click.help_option(u'-h', u'--help')
def search_index():
pass


@search_index.command(name=u'rebuild', short_help=u'Rebuild search index')
@click.option(u'-v', u'--verbose', is_flag=True)
@click.option(u'-i', u'--force', is_flag=True,
help=u'Ignore exceptions when rebuilding the index')
@click.option(u'-r', u'--refresh', help=u'Refresh current index', is_flag=True)
@click.option(u'-o', u'--only-missing',
help=u'Index non indexed datasets only', is_flag=True)
@click.option(u'-q', u'--quiet', help=u'Do not output index rebuild progress',
is_flag=True)
@click.option(u'-e', u'--commit-each', is_flag=True,
help=u'Perform a commit after indexing each dataset. This'
u'ensures that changes are immediately available on the'
u'search, but slows significantly the process. Default'
u'is false.')
@click.pass_context
def rebuild(ctx, verbose, force, refresh, only_missing, quiet, commit_each):
u''' Rebuild search index '''
flask_app = ctx.obj.app.apps['flask_app']._wsgi_app
from ckan.lib.search import rebuild, commit
try:
with flask_app.test_request_context():
rebuild(only_missing=only_missing,
force=force,
refresh=refresh,
defer_commit=(not commit_each),
quiet=quiet)
except Exception as e:
error_shout(e)
if not commit_each:
commit()


@search_index.command(name=u'check', short_help=u'Check search index')
def check():
from ckan.lib.search import check
check()


@search_index.command(name=u'show', short_help=u'Show index of a dataset')
@click.argument(u'dataset_name')
def show(dataset_name):
from ckan.lib.search import show

index = show(dataset_name)
click.echo(index)


@search_index.command(name=u'clear', short_help=u'Clear the search index')
@click.argument(u'dataset_name', required=False)
def clear(dataset_name):
from ckan.lib.search import clear, clear_all

if dataset_name:
clear(dataset_name)
else:
clear_all()


@search_index.command(name=u'rebuild-fast',
short_help=u'Reindex with multiprocessing')
@click.pass_context
def rebuild_fast(ctx):
conf = ctx.obj.config
flask_app = ctx.obj.app.apps['flask_app']._wsgi_app
db_url = conf['sqlalchemy.url']
engine = sa.create_engine(db_url)
package_ids = []
result = engine.execute(u"select id from package where state = 'active';")
for row in result:
package_ids.append(row[0])

def start(ids):
from ckan.lib.search import rebuild, commit
rebuild(package_ids=ids)
commit()

def chunks(l, n):
u""" Yield n successive chunks from l."""
newn = int(len(l) / n)
for i in range(0, n-1):
yield l[i*newn:i*newn+newn]
yield l[n*newn-newn:]

processes = []
with flask_app.test_request_context():
try:
for chunk in chunks(package_ids, mp.cpu_count()):
process = mp.Process(target=start, args=(chunk,))
processes.append(process)
process.daemon = True
process.start()

for process in processes:
process.join()
except Exception as e:
click.echo(e.message)
19 changes: 19 additions & 0 deletions ckan/cli/server.py
@@ -0,0 +1,19 @@
# encoding: utf-8

import logging

import click
from werkzeug.serving import run_simple

log = logging.getLogger(__name__)


@click.command(u'run', short_help=u'Start development server')
@click.option(u'-H', u'--host', default=u'localhost', help=u'Set host')
@click.option(u'-p', u'--port', default=5000, help=u'Set port')
@click.option(u'-r', u'--reloader', default=True, help=u'Use reloader')
@click.pass_context
def run(ctx, host, port, reloader):
u'''Runs development server'''
log.info(u"Running server {0} on port {1}".format(host, port))
run_simple(host, port, ctx.obj.app, use_reloader=reloader, use_evalex=True)
15 changes: 15 additions & 0 deletions ckan/config/middleware/flask_app.py
Expand Up @@ -2,6 +2,7 @@

import os
import re
import time
import inspect
import itertools
import pkgutil
Expand Down Expand Up @@ -107,6 +108,13 @@ def make_flask_stack(conf, **app_conf):
app.config['DEBUG_TB_INTERCEPT_REDIRECTS'] = False
DebugToolbarExtension(app)

from werkzeug.debug import DebuggedApplication
app = DebuggedApplication(app, True)
app = app.app

log = logging.getLogger('werkzeug')
log.setLevel(logging.DEBUG)

# Use Beaker as the Flask session interface
class BeakerSessionInterface(SessionInterface):
def open_session(self, app, request):
Expand Down Expand Up @@ -298,6 +306,8 @@ def ckan_before_request():
# with extensions
set_controller_and_action()

g.__timer = time.time()


def ckan_after_request(response):
u'''Common handler executed after all Flask requests'''
Expand All @@ -311,6 +321,11 @@ def ckan_after_request(response):
# Set CORS headers if necessary
response = set_cors_headers_for_response(response)

r_time = time.time() - g.__timer
url = request.environ['CKAN_CURRENT_URL'].split('?')[0]

log.info(' %s render time %.3f seconds' % (url, r_time))

return response


Expand Down
18 changes: 17 additions & 1 deletion ckan/config/solr/schema.xml
Expand Up @@ -24,7 +24,7 @@
<!-- We update the version when there is a backward-incompatible change to this
schema. In this case the version should be set to the next CKAN version number.
(x.y but not x.y.z since it needs to be a float) -->
<schema name="ckan" version="2.8">
<schema name="ckan" version="2.9">

<types>
<fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
Expand Down Expand Up @@ -81,6 +81,18 @@ schema. In this case the version should be set to the next CKAN version number.
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>

<fieldType name="text_ngram" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.NGramTokenizerFactory" minGramSize="2" maxGramSize="10"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>

</types>


Expand All @@ -89,10 +101,12 @@ schema. In this case the version should be set to the next CKAN version number.
<field name="id" type="string" indexed="true" stored="true" required="true" />
<field name="site_id" type="string" indexed="true" stored="true" required="true" />
<field name="title" type="text" indexed="true" stored="true" />
<field name="title_ngram" type="text_ngram" indexed="true" stored="true" />
<field name="entity_type" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="dataset_type" type="string" indexed="true" stored="true" />
<field name="state" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="name" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="name_ngram" type="text_ngram" indexed="true" stored="true" />
<field name="revision_id" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="version" type="string" indexed="true" stored="true" />
<field name="url" type="string" indexed="true" stored="true" omitNorms="true" />
Expand Down Expand Up @@ -165,6 +179,8 @@ schema. In this case the version should be set to the next CKAN version number.
<solrQueryParser defaultOperator="AND"/>

<copyField source="url" dest="urls"/>
<copyField source="title" dest="title_ngram"/>
<copyField source="name" dest="name_ngram"/>
<copyField source="ckan_url" dest="urls"/>
<copyField source="download_url" dest="urls"/>
<copyField source="res_url" dest="urls"/>
Expand Down

0 comments on commit 54b56ba

Please sign in to comment.