diff --git a/haystack/indexes.py b/haystack/indexes.py index 65802cf96..5851fe7f1 100644 --- a/haystack/indexes.py +++ b/haystack/indexes.py @@ -10,36 +10,38 @@ class DeclarativeMetaclass(type): def __new__(cls, name, bases, attrs): attrs['fields'] = {} - + # Inherit any fields from parent(s). try: parents = [b for b in bases if issubclass(b, SearchIndex)] - + # Simulate the MRO. + parents.reverse() + for p in parents: fields = getattr(p, 'fields', None) - + if fields: attrs['fields'].update(fields) except NameError: pass - + # Build a dictionary of faceted fields for cross-referencing. facet_fields = {} - + for field_name, obj in attrs.items(): # Only need to check the FacetFields. if hasattr(obj, 'facet_for'): if not obj.facet_for in facet_fields: facet_fields[obj.facet_for] = [] - + facet_fields[obj.facet_for].append(field_name) - + for field_name, obj in attrs.items(): if isinstance(obj, SearchField): field = attrs.pop(field_name) field.set_instance_name(field_name) attrs['fields'][field_name] = field - + # Only check non-faceted fields for the following info. if not hasattr(field, 'facet_for'): if field.faceted == True: @@ -50,83 +52,83 @@ def __new__(cls, name, bases, attrs): shadow_facet_field = field.facet_class(facet_for=field_name) shadow_facet_field.set_instance_name(shadow_facet_name) attrs['fields'][shadow_facet_name] = shadow_facet_field - + return super(DeclarativeMetaclass, cls).__new__(cls, name, bases, attrs) class SearchIndex(object): """ Base class for building indexes. - + An example might look like this:: - + import datetime from haystack.indexes import * from myapp.models import Note - + class NoteIndex(SearchIndex): text = CharField(document=True, use_template=True) author = CharField(model_attr='user') pub_date = DateTimeField(model_attr='pub_date') - + def index_queryset(self): return super(NoteIndex, self).index_queryset().filter(pub_date__lte=datetime.datetime.now()) - + """ __metaclass__ = DeclarativeMetaclass - + def __init__(self, model, backend=None): self.model = model - + if backend: self.backend = backend else: import haystack self.backend = haystack.backend.SearchBackend() - + self.prepared_data = None content_fields = [] - + for field_name, field in self.fields.items(): if field.document is True: content_fields.append(field_name) - + if not len(content_fields) == 1: raise SearchFieldError("An index must have one (and only one) SearchField with document=True.") - + def _setup_save(self, model): """A hook for controlling what happens when the registered model is saved.""" pass - + def _setup_delete(self, model): """A hook for controlling what happens when the registered model is deleted.""" pass - + def _teardown_save(self, model): """A hook for removing the behavior when the registered model is saved.""" pass - + def _teardown_delete(self, model): """A hook for removing the behavior when the registered model is deleted.""" pass - + def index_queryset(self): """ Get the default QuerySet to index when doing a full update. - + Subclasses can override this method to avoid indexing certain objects. """ return self.model._default_manager.all() - + def read_queryset(self): """ Get the default QuerySet for read actions. - + Subclasses can override this method to work with other managers. Useful when working with default managers that filter some objects. """ return self.index_queryset() - + def prepare(self, obj): """ Fetches and adds/alters data before indexing. @@ -136,46 +138,46 @@ def prepare(self, obj): DJANGO_CT: "%s.%s" % (obj._meta.app_label, obj._meta.module_name), DJANGO_ID: force_unicode(obj.pk), } - + for field_name, field in self.fields.items(): # Use the possibly overridden name, which will default to the # variable name of the field. self.prepared_data[field.index_fieldname] = field.prepare(obj) - + for field_name, field in self.fields.items(): if hasattr(self, "prepare_%s" % field_name): value = getattr(self, "prepare_%s" % field_name)(obj) self.prepared_data[field.index_fieldname] = value - + return self.prepared_data - + def full_prepare(self, obj): self.prepared_data = self.prepare(obj) - + # Duplicate data for faceted fields. for field_name, field in self.fields.items(): if getattr(field, 'facet_for', None): source_field_name = self.fields[field.facet_for].index_fieldname - + # If there's data there, leave it alone. Otherwise, populate it # with whatever the related field has. if self.prepared_data[field_name] is None and source_field_name in self.prepared_data: self.prepared_data[field.index_fieldname] = self.prepared_data[source_field_name] - + # Remove any fields that lack a value and are ``null=True``. for field_name, field in self.fields.items(): if field.null is True: if self.prepared_data[field.index_fieldname] is None: del(self.prepared_data[field.index_fieldname]) - + return self.prepared_data - + def get_content_field(self): """Returns the field that supplies the primary document to be indexed.""" for field_name, field in self.fields.items(): if field.document is True: return field.index_fieldname - + def get_field_weights(self): """Returns a dict of fields with weight values""" weights = {} @@ -183,11 +185,11 @@ def get_field_weights(self): if field.boost: weights[field_name] = field.boost return weights - + def update(self): """Update the entire index""" self.backend.update(self, self.index_queryset()) - + def update_object(self, instance, **kwargs): """ Update the index for a single object. Attached to the class's @@ -196,54 +198,54 @@ def update_object(self, instance, **kwargs): # Check to make sure we want to index this first. if self.should_update(instance, **kwargs): self.backend.update(self, [instance]) - + def remove_object(self, instance, **kwargs): """ - Remove an object from the index. Attached to the class's + Remove an object from the index. Attached to the class's post-delete hook. """ self.backend.remove(instance) - + def clear(self): """Clear the entire index.""" self.backend.clear(models=[self.model]) - + def reindex(self): """Completely clear the index for this model and rebuild it.""" self.clear() self.update() - + def get_updated_field(self): """ Get the field name that represents the updated date for the model. - + If specified, this is used by the reindex command to filter out results from the QuerySet, enabling you to reindex only recent records. This method should either return None (reindex everything always) or a string of the Model's DateField/DateTimeField name. """ return None - + def should_update(self, instance, **kwargs): """ Determine if an object should be updated in the index. - + It's useful to override this when an object may save frequently and cause excessive reindexing. You should check conditions on the instance and return False if it is not to be indexed. - + By default, returns True (always reindex). """ return True - + def load_all_queryset(self): """ Provides the ability to override how objects get loaded in conjunction with ``SearchQuerySet.load_all``. - + This is useful for post-processing the results from the query, enabling things like adding ``select_related`` or filtering certain data. - + By default, returns ``all()`` on the model's default manager. """ return self.model._default_manager.all() @@ -256,13 +258,13 @@ class RealTimeSearchIndex(SearchIndex): """ def _setup_save(self, model): signals.post_save.connect(self.update_object, sender=model) - + def _setup_delete(self, model): signals.post_delete.connect(self.remove_object, sender=model) - + def _teardown_save(self, model): signals.post_save.disconnect(self.update_object, sender=model) - + def _teardown_delete(self, model): signals.post_delete.disconnect(self.remove_object, sender=model) @@ -281,7 +283,7 @@ def index_field_from_django_field(f, default=CharField): Django type. """ result = default - + if f.get_internal_type() in ('DateField', 'DateTimeField'): result = DateTimeField elif f.get_internal_type() in ('BooleanField', 'NullBooleanField'): @@ -292,7 +294,7 @@ def index_field_from_django_field(f, default=CharField): result = FloatField elif f.get_internal_type() in ('IntegerField', 'PositiveIntegerField', 'PositiveSmallIntegerField', 'SmallIntegerField'): result = IntegerField - + return result @@ -300,52 +302,52 @@ class ModelSearchIndex(SearchIndex): """ Introspects the model assigned to it and generates a `SearchIndex` based on the fields of that model. - + In addition, it adds a `text` field that is the `document=True` field and has `use_template=True` option set, just like the `BasicSearchIndex`. - + Usage of this class might result in inferior `SearchIndex` objects, which can directly affect your search results. Use this to establish basic functionality and move to custom `SearchIndex` objects for better control. - + At this time, it does not handle related fields. """ text = CharField(document=True, use_template=True) # list of reserved field names fields_to_skip = (ID, DJANGO_CT, DJANGO_ID, 'content', 'text') - + def __init__(self, model, backend=None, extra_field_kwargs=None): self.model = model - + if backend: self.backend = backend else: import haystack self.backend = haystack.backend.SearchBackend() - + self.prepared_data = None content_fields = [] self.extra_field_kwargs = extra_field_kwargs or {} - + # Introspect the model, adding/removing fields as needed. # Adds/Excludes should happen only if the fields are not already # defined in `self.fields`. self._meta = getattr(self, 'Meta', None) - + if self._meta: fields = getattr(self._meta, 'fields', []) excludes = getattr(self._meta, 'excludes', []) - + # Add in the new fields. self.fields.update(self.get_fields(fields, excludes)) - + for field_name, field in self.fields.items(): if field.document is True: content_fields.append(field_name) - + if not len(content_fields) == 1: raise SearchFieldError("An index must have one (and only one) SearchField with document=True.") - + def should_skip_field(self, field): """ Given a Django model field, return if it should be included in the @@ -354,19 +356,19 @@ def should_skip_field(self, field): # Skip fields in skip list if field.name in self.fields_to_skip: return True - + # Ignore certain fields (AutoField, related fields). if field.primary_key or getattr(field, 'rel'): return True - + return False - + def get_index_fieldname(self, f): """ Given a Django field, return the appropriate index fieldname. """ return f.name - + def get_fields(self, fields=None, excludes=None): """ Given any explicit fields to include and fields to exclude, add @@ -375,37 +377,37 @@ def get_fields(self, fields=None, excludes=None): final_fields = {} fields = fields or [] excludes = excludes or [] - + for f in self.model._meta.fields: # If the field name is already present, skip if f.name in self.fields: continue - + # If field is not present in explicit field listing, skip if fields and f.name not in fields: continue - + # If field is in exclude list, skip if excludes and f.name in excludes: continue - + if self.should_skip_field(f): continue - + index_field_class = index_field_from_django_field(f) - + kwargs = copy.copy(self.extra_field_kwargs) kwargs.update({ 'model_attr': f.name, }) - + if f.null is True: kwargs['null'] = True - + if f.has_default(): kwargs['default'] = f.default - + final_fields[f.name] = index_field_class(**kwargs) final_fields[f.name].set_instance_name(self.get_index_fieldname(f)) - + return final_fields diff --git a/tests/core/tests/indexes.py b/tests/core/tests/indexes.py index 63087599a..85889c336 100644 --- a/tests/core/tests/indexes.py +++ b/tests/core/tests/indexes.py @@ -35,21 +35,21 @@ class GoodCustomMockSearchIndex(SearchIndex): pub_date = DateTimeField(model_attr='pub_date', faceted=True) extra = CharField(indexed=False, use_template=True) hello = CharField(model_attr='hello') - + def prepare(self, obj): super(GoodCustomMockSearchIndex, self).prepare(obj) self.prepared_data['whee'] = 'Custom preparation.' return self.prepared_data - + def prepare_author(self, obj): return "Hi, I'm %s" % self.prepared_data['author'] - + def load_all_queryset(self): return self.model._default_manager.filter(id__gt=1) - + def index_queryset(self): return MockModel.objects.all() - + def read_queryset(self): return MockModel.objects.filter(author__in=['daniel1', 'daniel3']) @@ -71,14 +71,26 @@ class GoodFacetedMockSearchIndex(SearchIndex): author_foo = FacetCharField(facet_for='author') pub_date = DateTimeField(model_attr='pub_date') pub_date_exact = FacetDateTimeField(facet_for='pub_date') - + def prepare_author(self, obj): return "Hi, I'm %s" % self.prepared_data['author'] - + def prepare_pub_date_exact(self, obj): return "2010-10-26T01:54:32" +class MROFieldsSearchIndexA(SearchIndex): + text = CharField(document=True, model_attr='test_a') + + +class MROFieldsSearchIndexB(SearchIndex): + text = CharField(document=True, model_attr='test_b') + + +class MROFieldsSearchChild(MROFieldsSearchIndexA, MROFieldsSearchIndexB): + pass + + class SearchIndexTestCase(TestCase): def setUp(self): super(SearchIndexTestCase, self).setUp() @@ -116,19 +128,19 @@ def setUp(self): 'id': u'core.mockmodel.3' } } - + def test_no_contentfield_present(self): self.assertRaises(SearchFieldError, BadSearchIndex1, MockModel, MockSearchBackend()) - + def test_too_many_contentfields_present(self): self.assertRaises(SearchFieldError, BadSearchIndex2, MockModel, MockSearchBackend()) - + def test_contentfield_present(self): try: mi = GoodMockSearchIndex(MockModel, backend=MockSearchBackend()) except: self.fail() - + def test_proper_fields(self): self.assertEqual(len(self.mi.fields), 4) self.assert_('content' in self.mi.fields) @@ -139,7 +151,7 @@ def test_proper_fields(self): self.assert_(isinstance(self.mi.fields['pub_date'], DateTimeField)) self.assert_('extra' in self.mi.fields) self.assert_(isinstance(self.mi.fields['extra'], CharField)) - + self.assertEqual(len(self.cmi.fields), 7) self.assert_('content' in self.cmi.fields) self.assert_(isinstance(self.cmi.fields['content'], CharField)) @@ -155,131 +167,131 @@ def test_proper_fields(self): self.assert_(isinstance(self.cmi.fields['extra'], CharField)) self.assert_('hello' in self.cmi.fields) self.assert_(isinstance(self.cmi.fields['extra'], CharField)) - + def test_index_queryset(self): self.assertEqual(len(self.cmi.index_queryset()), 3) - + def test_read_queryset(self): self.assertEqual(len(self.cmi.read_queryset()), 2) - + def test_prepare(self): mock = MockModel() mock.pk = 20 mock.author = 'daniel%s' % mock.id mock.pub_date = datetime.datetime(2009, 1, 31, 4, 19, 0) - + self.assertEqual(len(self.mi.prepare(mock)), 7) self.assertEqual(sorted(self.mi.prepare(mock).keys()), ['author', 'content', 'django_ct', 'django_id', 'extra', 'id', 'pub_date']) - + def test_custom_prepare(self): mock = MockModel() mock.pk = 20 mock.author = 'daniel%s' % mock.id mock.pub_date = datetime.datetime(2009, 1, 31, 4, 19, 0) - + self.assertEqual(len(self.cmi.prepare(mock)), 11) self.assertEqual(sorted(self.cmi.prepare(mock).keys()), ['author', 'author_exact', 'content', 'django_ct', 'django_id', 'extra', 'hello', 'id', 'pub_date', 'pub_date_exact', 'whee']) - + self.assertEqual(len(self.cmi.full_prepare(mock)), 11) self.assertEqual(sorted(self.cmi.full_prepare(mock).keys()), ['author', 'author_exact', 'content', 'django_ct', 'django_id', 'extra', 'hello', 'id', 'pub_date', 'pub_date_exact', 'whee']) - + def test_custom_prepare_author(self): mock = MockModel() mock.pk = 20 mock.author = 'daniel%s' % mock.id mock.pub_date = datetime.datetime(2009, 1, 31, 4, 19, 0) - + self.assertEqual(len(self.cmi.prepare(mock)), 11) self.assertEqual(sorted(self.cmi.prepare(mock).keys()), ['author', 'author_exact', 'content', 'django_ct', 'django_id', 'extra', 'hello', 'id', 'pub_date', 'pub_date_exact', 'whee']) - + self.assertEqual(len(self.cmi.full_prepare(mock)), 11) self.assertEqual(sorted(self.cmi.full_prepare(mock).keys()), ['author', 'author_exact', 'content', 'django_ct', 'django_id', 'extra', 'hello', 'id', 'pub_date', 'pub_date_exact', 'whee']) self.assertEqual(self.cmi.prepared_data['author'], "Hi, I'm daniel20") self.assertEqual(self.cmi.prepared_data['author_exact'], "Hi, I'm daniel20") - + def test_custom_model_attr(self): mock = MockModel() mock.pk = 20 mock.author = 'daniel%s' % mock.id mock.pub_date = datetime.datetime(2009, 1, 31, 4, 19, 0) - + self.assertEqual(len(self.cmi.prepare(mock)), 11) self.assertEqual(sorted(self.cmi.prepare(mock).keys()), ['author', 'author_exact', 'content', 'django_ct', 'django_id', 'extra', 'hello', 'id', 'pub_date', 'pub_date_exact', 'whee']) - + self.assertEqual(len(self.cmi.full_prepare(mock)), 11) self.assertEqual(sorted(self.cmi.full_prepare(mock).keys()), ['author', 'author_exact', 'content', 'django_ct', 'django_id', 'extra', 'hello', 'id', 'pub_date', 'pub_date_exact', 'whee']) self.assertEqual(self.cmi.prepared_data['hello'], u'World!') - + def test_custom_index_fieldname(self): mock = MockModel() mock.pk = 20 mock.author = 'daniel%s' % mock.id mock.pub_date = datetime.datetime(2009, 1, 31, 4, 19, 0) - + cofnmi = GoodOverriddenFieldNameMockSearchIndex(MockModel, backend=self.msb) self.assertEqual(len(cofnmi.prepare(mock)), 6) self.assertEqual(sorted(cofnmi.prepare(mock).keys()), ['django_ct', 'django_id', 'hello', 'id', 'more_content', 'name_s']) self.assertEqual(cofnmi.prepared_data['name_s'], u'daniel20') self.assertEqual(cofnmi.get_content_field(), 'more_content') - + def test_get_content_field(self): self.assertEqual(self.mi.get_content_field(), 'content') - + def test_update(self): self.mi.update() self.assertEqual(self.msb.docs, self.sample_docs) self.msb.clear() - + def test_update_object(self): self.assertEqual(self.msb.docs, {}) - + mock = MockModel() mock.pk = 20 mock.author = 'daniel%s' % mock.id mock.pub_date = datetime.datetime(2009, 1, 31, 4, 19, 0) - + self.mi.update_object(mock) self.assertEqual(self.msb.docs, {'core.mockmodel.20': {'django_id': u'20', 'django_ct': u'core.mockmodel', 'author': u'daniel20', 'extra': u'Stored!\n20', 'content': u'Indexed!\n20', 'pub_date': datetime.datetime(2009, 1, 31, 4, 19), 'id': 'core.mockmodel.20'}}) self.msb.clear() - + def test_remove_object(self): self.msb.docs = {'core.mockmodel.20': 'Indexed!\n20'} - + mock = MockModel() mock.pk = 20 - + self.mi.remove_object(mock) self.assertEqual(self.msb.docs, {}) self.msb.clear() - + def test_clear(self): self.msb.docs = { 'core.mockmodel.1': 'Indexed!\n1', 'core.mockmodel.2': 'Indexed!\n2', 'core.mockmodel.20': 'Indexed!\n20', } - + self.mi.clear() self.assertEqual(self.msb.docs, {}) self.msb.clear() - + def test_reindex(self): self.msb.docs = { 'core.mockmodel.1': 'Indexed!\n1', 'core.mockmodel.2': 'Indexed!\n2', 'core.mockmodel.20': 'Indexed!\n20', } - + self.mi.reindex() self.assertEqual(self.msb.docs, self.sample_docs) self.msb.clear() - + def test_inheritance(self): try: agmi = AltGoodMockSearchIndex(MockModel, backend=self.msb) except: self.fail() - + self.assertEqual(len(agmi.fields), 5) self.assert_('content' in agmi.fields) self.assert_(isinstance(agmi.fields['content'], CharField)) @@ -291,34 +303,48 @@ def test_inheritance(self): self.assert_(isinstance(agmi.fields['extra'], CharField)) self.assert_('additional' in agmi.fields) self.assert_(isinstance(agmi.fields['additional'], CharField)) - + + def test_proper_field_resolution(self): + mrofsc = MROFieldsSearchChild(MockModel, backend=self.msb) + mock = MockModel() + mock.pk = 20 + mock.author = 'daniel%s' % mock.id + mock.pub_date = datetime.datetime(2009, 1, 31, 4, 19, 0) + mock.test_a = 'This is A' + mock.test_b = 'This is B' + + self.assertEqual(len(mrofsc.fields), 1) + prepped_data = mrofsc.prepare(mock) + self.assertEqual(len(prepped_data), 4) + self.assertEqual(prepped_data['text'], 'This is A') + def test_load_all_queryset(self): self.assertEqual([obj.id for obj in self.cmi.load_all_queryset()], [2, 3]) - + def test_nullable(self): mock = MockModel() mock.pk = 20 mock.author = None mock.pub_date = datetime.datetime(2009, 1, 31, 4, 19, 0) - + prepared_data = self.cnmi.prepare(mock) self.assertEqual(len(prepared_data), 6) self.assertEqual(sorted(prepared_data.keys()), ['author', 'author_exact', 'content', 'django_ct', 'django_id', 'id']) - + prepared_data = self.cnmi.full_prepare(mock) self.assertEqual(len(prepared_data), 4) self.assertEqual(sorted(prepared_data.keys()), ['content', 'django_ct', 'django_id', 'id']) - + def test_custom_facet_fields(self): mock = MockModel() mock.pk = 20 mock.author = 'daniel' mock.pub_date = datetime.datetime(2009, 1, 31, 4, 19, 0) - + prepared_data = self.gfmsi.prepare(mock) self.assertEqual(len(prepared_data), 8) self.assertEqual(sorted(prepared_data.keys()), ['author', 'author_foo', 'content', 'django_ct', 'django_id', 'id', 'pub_date', 'pub_date_exact']) - + prepared_data = self.gfmsi.full_prepare(mock) self.assertEqual(len(prepared_data), 8) self.assertEqual(sorted(prepared_data.keys()), ['author', 'author_foo', 'content', 'django_ct', 'django_id', 'id', 'pub_date', 'pub_date_exact']) @@ -343,10 +369,10 @@ class Meta: class FieldsWithOverrideModelSearchIndex(ModelSearchIndex): foo = IntegerField(model_attr='foo') - + class Meta: fields = ['author', 'foo'] - + def get_index_fieldname(self, f): if f.name == 'author': return 'author_bar' @@ -374,7 +400,7 @@ def setUp(self): self.emsi = ExcludesModelSearchIndex(MockModel, backend=self.msb) self.fwomsi = FieldsWithOverrideModelSearchIndex(MockModel, backend=self.msb) self.yabmsi = YetAnotherBasicModelSearchIndex(AThirdMockModel, backend=self.msb) - + def test_basic(self): self.assertEqual(len(self.bmsi.fields), 4) self.assert_('foo' in self.bmsi.fields) @@ -391,7 +417,7 @@ def test_basic(self): self.assert_(isinstance(self.bmsi.fields['text'], CharField)) self.assertEqual(self.bmsi.fields['text'].document, True) self.assertEqual(self.bmsi.fields['text'].use_template, True) - + def test_fields(self): self.assertEqual(len(self.fmsi.fields), 3) self.assert_('author' in self.fmsi.fields) @@ -400,14 +426,14 @@ def test_fields(self): self.assert_(isinstance(self.fmsi.fields['pub_date'], DateTimeField)) self.assert_('text' in self.fmsi.fields) self.assert_(isinstance(self.fmsi.fields['text'], CharField)) - + def test_excludes(self): self.assertEqual(len(self.emsi.fields), 2) self.assert_('pub_date' in self.emsi.fields) self.assert_(isinstance(self.emsi.fields['pub_date'], DateTimeField)) self.assert_('text' in self.emsi.fields) self.assert_(isinstance(self.emsi.fields['text'], CharField)) - + def test_fields_with_override(self): self.assertEqual(len(self.fwomsi.fields), 3) self.assert_('author' in self.fwomsi.fields) @@ -416,11 +442,11 @@ def test_fields_with_override(self): self.assert_(isinstance(self.fwomsi.fields['foo'], IntegerField)) self.assert_('text' in self.fwomsi.fields) self.assert_(isinstance(self.fwomsi.fields['text'], CharField)) - + def test_overriding_field_name_with_get_index_fieldname(self): self.assert_(self.fwomsi.fields['foo'].index_fieldname, 'foo') self.assert_(self.fwomsi.fields['author'].index_fieldname, 'author_bar') - + def test_float_integer_fields(self): self.assertEqual(len(self.yabmsi.fields), 5) self.assertEqual(self.yabmsi.fields.keys(), ['average_delay', 'text', 'author', 'pub_date', 'view_count'])