-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Solr6 template upgrade #1504
Solr6 template upgrade #1504
Changes from 29 commits
2955843
a8d79ff
c4f73d6
eaaf3a2
d664828
085fc91
493085c
cfae3ed
17fb620
9a375d4
3389712
59bf22d
ac52128
f384e74
4197a9f
5a51fa0
544378a
0eae986
db19db6
045fa19
0a6613d
d4e7a5b
b356097
f3a30f1
24ee453
19fbf00
30058af
49388f2
042d6c3
9f8c583
3b94141
ed228fb
c4e30e1
3d5583c
898a51b
0509703
4ccc96a
65c592c
f8bad8f
1d810db
bd877a5
46fb6c2
6dca6ac
477c677
168c9ae
6cf4f26
245b946
6501ce6
a46a515
01dc17b
47a11b7
0e41ff5
03209da
5608db1
038dd5f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -45,6 +45,8 @@ def __init__(self, connection_alias, **connection_options): | |
if 'URL' not in connection_options: | ||
raise ImproperlyConfigured("You must specify a 'URL' in your settings for connection '%s'." % connection_alias) | ||
|
||
self.collate = connection_options.get('COLLATE_SPELLING', True) | ||
|
||
self.conn = Solr(connection_options['URL'], timeout=self.timeout, | ||
**connection_options.get('KWARGS', {})) | ||
self.log = logging.getLogger('haystack') | ||
|
@@ -151,7 +153,7 @@ def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_of | |
narrow_queries=None, spelling_query=None, | ||
within=None, dwithin=None, distance_point=None, | ||
models=None, limit_to_registered_models=None, | ||
result_class=None, stats=None, | ||
result_class=None, stats=None, collate=None, | ||
**extra_kwargs): | ||
kwargs = {'fl': '* score'} | ||
|
||
|
@@ -201,9 +203,11 @@ def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_of | |
for key in highlight.keys() | ||
}) | ||
|
||
if collate is None: | ||
collate = self.collate | ||
if self.include_spelling is True: | ||
kwargs['spellcheck'] = 'true' | ||
kwargs['spellcheck.collate'] = 'true' | ||
kwargs['spellcheck.collate'] = str(collate).lower() | ||
kwargs['spellcheck.count'] = 1 | ||
|
||
if spelling_query: | ||
|
@@ -389,13 +393,38 @@ def _process_results(self, raw_results, highlight=False, result_class=None, dist | |
facets[key][facet_field][1::2])) | ||
|
||
if self.include_spelling and hasattr(raw_results, 'spellcheck'): | ||
# Solr 5+ changed the JSON response format so the suggestions will be key-value mapped rather | ||
# than simply paired elements in a list, which is a nice improvement but incompatible with | ||
# Solr 4: https://issues.apache.org/jira/browse/SOLR-3029 | ||
if len(raw_results.spellcheck.get('collations', [])): | ||
spelling_suggestion = raw_results.spellcheck['collations'][-1] | ||
elif len(raw_results.spellcheck.get('suggestions', [])): | ||
spelling_suggestion = raw_results.spellcheck['suggestions'][-1] | ||
# There are many different formats for Legacy, 6.4, and 6.5 | ||
# e.g. https://issues.apache.org/jira/browse/SOLR-3029 | ||
collations = raw_results.spellcheck.get('collations', []) | ||
suggestions = raw_results.spellcheck.get('suggestions', []) | ||
if len(collations): | ||
#Handle sol6.5 collation format | ||
if isinstance(collations, dict): | ||
spelling_suggestions= [col['collationQuery'] for col in collations.values()] #aggregate for future use in multi suggestion response | ||
#Legacy Legacy & 6.4 handling | ||
else: | ||
if isinstance(collations[1], dict): #Solr6.4 | ||
spelling_suggestions = [item["collationQuery"] for item in collations if isinstance(item,dict)] #aggregate for future use in multi suggestion response | ||
else: #Legacy Solr format | ||
spelling_suggestions=collations[-1] | ||
|
||
spelling_suggestion = spelling_suggestions[-1] #Keep current method of returning single value | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a reason why we would want the last suggestion rather than the first? I'm thinking we should have a comment with the rationale There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is just the way it was already done. I have no intuition as to why the last was taken. I believe solr CAN do some sorting of the returns based on some priority...But just wanted to keep convention. |
||
elif len(suggestions): | ||
#Handle sol6.5 suggestion format | ||
if isinstance(suggestions, dict): | ||
for word,sug in suggestions.items(): | ||
spelling_suggestions = [item["word"] for item in sug['suggestion']] #aggregate for future use in multi suggestion response | ||
#Legacy Legacy & 6.4 handling | ||
else: | ||
spelling_suggestions = [] | ||
if isinstance(suggestions[1], dict): #Solr6.4 | ||
for item in suggestions: | ||
if isinstance(item, dict): | ||
spelling_suggestions += [subitem["word"] for subitem in item['suggestion']] | ||
else: #Legacy Solr | ||
spelling_suggestions=suggestions[-1] | ||
|
||
spelling_suggestion = spelling_suggestions[-1] #Keep current method of returning single value | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since we have this code in a few places, what do you think about declaring |
||
|
||
assert spelling_suggestion is None or isinstance(spelling_suggestion, six.string_types) | ||
|
||
|
@@ -722,6 +751,7 @@ def run(self, spelling_query=None, **kwargs): | |
search_kwargs.update(kwargs) | ||
|
||
results = self.backend.search(final_query, **search_kwargs) | ||
|
||
self._results = results.get('results', []) | ||
self._hit_count = results.get('hits', 0) | ||
self._facet_counts = self.post_process_facets(results) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,6 +10,7 @@ | |
ID = getattr(settings, 'HAYSTACK_ID_FIELD', 'id') | ||
DJANGO_CT = getattr(settings, 'HAYSTACK_DJANGO_CT_FIELD', 'django_ct') | ||
DJANGO_ID = getattr(settings, 'HAYSTACK_DJANGO_ID_FIELD', 'django_id') | ||
HAYSTACK_DOCUMENT_FIELD = getattr(settings, 'HAYSTACK_DOCUMENT_FIELD', 'text') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems like this could be |
||
|
||
# Default operator. Valid options are AND/OR. | ||
DEFAULT_OPERATOR = getattr(settings, 'HAYSTACK_DEFAULT_OPERATOR', 'AND') | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,15 +3,21 @@ | |
from __future__ import absolute_import, division, print_function, unicode_literals | ||
|
||
from django.core.exceptions import ImproperlyConfigured | ||
from django.core.management.base import BaseCommand | ||
from django.core.management.base import BaseCommand,CommandError | ||
from django.template import Context, loader | ||
from django.conf import settings | ||
|
||
from haystack import connections, connection_router, constants | ||
from haystack.backends.solr_backend import SolrSearchBackend | ||
|
||
|
||
import pysolr | ||
import os | ||
import traceback | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I use |
||
import requests | ||
class Command(BaseCommand): | ||
help = "Generates a Solr schema that reflects the indexes." | ||
help = "Generates a Solr schema that reflects the indexes using templates under a django template dir 'search_configuration/*.xml'" | ||
schema_template_loc = 'search_configuration/schema.xml' | ||
solrcfg_template_loc = 'search_configuration/solrconfig.xml' | ||
|
||
def add_arguments(self, parser): | ||
parser.add_argument( | ||
|
@@ -22,15 +28,66 @@ def add_arguments(self, parser): | |
"-u", "--using", default=constants.DEFAULT_ALIAS, | ||
help='If provided, chooses a connection to work with.' | ||
) | ||
parser.add_argument( | ||
"-c", "--configure_dir", | ||
help='If provided, attempts to configure a core located in the given directory by removing the managed-schema.xml(renaming), configuring the core to use a classic (non-dynamic) schema, and generating the schema.xml from the template provided in' | ||
) | ||
parser.add_argument( | ||
"-r", "--reload", | ||
help='If provided, attempts to automatically reload the solr core' | ||
) | ||
|
||
|
||
def handle(self, **options): | ||
"""Generates a Solr schema that reflects the indexes.""" | ||
using = options.get('using') | ||
schema_xml = self.build_template(using=using) | ||
if not isinstance(connections[using].get_backend(), SolrSearchBackend): | ||
raise ImproperlyConfigured("'%s' isn't configured as a SolrEngine)." % connections[using].get_backend().connection_alias) | ||
|
||
schema_xml = self.build_template(using=using,tfile=Command.schema_template_loc) | ||
solrcfg_xml = self.build_template(using=using,tfile=Command.solrcfg_template_loc) | ||
|
||
if options.get('filename'): | ||
self.stdout.write("Trying to write schema file located at {}".format(options.get('filename'))) | ||
self.write_file(options.get('filename'), schema_xml) | ||
else: | ||
if options.get('reload'): | ||
connections[using].get_backend().reload() | ||
|
||
if options.get('configure_dir'): | ||
cdir = options.get('configure_dir') | ||
self.stdout.write("Trying to configure core located at {}".format(cdir)) | ||
if os.path.isfile(cdir+'/managed-schema'): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should use |
||
try: | ||
os.rename(cdir+'/managed-schema',cdir+'/managed-schema.old') | ||
except: | ||
raise CommandError('Could not rename managed schema out of the way: {}'.format(cdir+'/managed-schema')) | ||
try: | ||
self.write_file(cdir+'/schema.xml', schema_xml) | ||
except: | ||
raise CommandError('Could not configure {}: {}'.format(cdir+'/schema.xml',traceback.format_exc())) | ||
|
||
try: | ||
self.write_file(cdir+'/solrconfig.xml',solrcfg_xml) | ||
except: | ||
raise CommandError('Could not configure core to use classic Schema Factory {}'.format(cdir+'/solrconfig.xml')) | ||
|
||
if options.get('reload'): | ||
core= settings.HAYSTACK_CONNECTIONS['solr']['URL'].rsplit('/',1)[-1] | ||
if 'ADMIN_URL' not in settings.HAYSTACK_CONNECTIONS['solr']: | ||
raise ImproperlyConfigured("'ADMIN_URL' must be specifid in the HAYSTACK_CONNECTIONS settins for the backend." ) | ||
if 'URL' not in settings.HAYSTACK_CONNECTIONS['solr']: | ||
raise ImproperlyConfigured("'URL' to the core must be specifid in the HAYSTACK_CONNECTIONS settins for the backend.") | ||
try: | ||
self.stdout.write("Trying to relaod core named {}".format(core)) | ||
resp = requests.get(settings.HAYSTACK_CONNECTIONS['solr']['ADMIN_URL'],params="action=RELOAD&core="+core).text#TODO: Fix when pysolr passes params as request params instead of data | ||
if resp.find('SolrException')!=-1: | ||
raise CommandError('Solr Exception Thrown -- Failed to reload core: {}'.format(resp)) | ||
except CommandError: | ||
raise | ||
except: | ||
raise CommandError('Failed to reload core: {}'.format(traceback.format_exc())) | ||
|
||
if options.get('filename') is None and options.get('configure_dir') is None and options.get('reload') is None: | ||
self.print_stdout(schema_xml) | ||
|
||
def build_context(self, using): | ||
|
@@ -42,17 +99,17 @@ def build_context(self, using): | |
content_field_name, fields = backend.build_schema( | ||
connections[using].get_unified_index().all_searchfields() | ||
) | ||
return Context({ | ||
return { | ||
'content_field_name': content_field_name, | ||
'fields': fields, | ||
'default_operator': constants.DEFAULT_OPERATOR, | ||
'ID': constants.ID, | ||
'DJANGO_CT': constants.DJANGO_CT, | ||
'DJANGO_ID': constants.DJANGO_ID, | ||
}) | ||
} | ||
|
||
def build_template(self, using): | ||
t = loader.get_template('search_configuration/solr.xml') | ||
def build_template(self, using, tfile=schema_template_loc): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since this is a keyword argument it should be something like |
||
t = loader.get_template(tfile) | ||
c = self.build_context(using=using) | ||
return t.render(c) | ||
|
||
|
@@ -68,3 +125,4 @@ def print_stdout(self, schema_xml): | |
def write_file(self, filename, schema_xml): | ||
with open(filename, 'w') as schema_file: | ||
schema_file.write(schema_xml) | ||
os.fsync(schema_file.fileno()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Minor nitpick but we should probably have a space after
#
in the comments in this block