Skip to content

Commit

Permalink
[search] Index creatiaon and modification date
Browse files Browse the repository at this point in the history
As the SOLR schema needs to be modified for the new release, we
might as well introduce this small change that will allow the long
overdue ticket #191 (Search by modification date).
  • Loading branch information
amercader committed Dec 5, 2011
1 parent 5814170 commit 2ed1fd9
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 7 deletions.
1 change: 1 addition & 0 deletions ckan/config/solr/CHANGELOG.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ v1.3 - (ckan>=1.5.2)
--------------------
* Use the index_id (hash of dataset id + site_id) as uniqueKey (#1430)
* Store extras (#1455)
* Store dataset creation and modification date (#191)

v1.2 - (ckan<=1.5.1)
--------------------
Expand Down
11 changes: 7 additions & 4 deletions ckan/config/solr/schema-1.3.xml
Original file line number Diff line number Diff line change
Expand Up @@ -91,15 +91,15 @@


<fields>
<field name="index_id" type="string" indexed="true" stored="true" required="true" />
<field name="id" type="string" indexed="true" stored="true" required="true" />
<field name="site_id" type="string" indexed="true" stored="true" required="true" />
<field name="index_id" type="string" indexed="true" stored="true" required="true" />
<field name="id" type="string" indexed="true" stored="true" required="true" />
<field name="site_id" type="string" indexed="true" stored="true" required="true" />
<field name="title" type="text" indexed="true" stored="true" />
<field name="entity_type" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="state" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="name" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="revision_id" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="version" type="string" indexed="true" stored="true" />
<field name="version" type="string" indexed="true" stored="true" />
<field name="url" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="ckan_url" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="download_url" type="string" indexed="true" stored="true" omitNorms="true" />
Expand Down Expand Up @@ -133,6 +133,9 @@
<field name="child_of" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="parent_of" type="text" indexed="true" stored="false" multiValued="true"/>

<field name="metadata_created" type="date" indexed="true" stored="true" multiValued="false"/>
<field name="metadata_modified" type="date" indexed="true" stored="true" multiValued="false"/>

<field name="indexed_ts" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>

<dynamicField name="extras_*" type="text" indexed="true" stored="true" multiValued="false"/>
Expand Down
8 changes: 7 additions & 1 deletion ckan/lib/search/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,13 @@ def index_package(self, pkg_dict):

pkg_dict[TYPE_FIELD] = PACKAGE_TYPE
pkg_dict = dict([(k.encode('ascii', 'ignore'), v) for (k, v) in pkg_dict.items()])


# modify dates (SOLR is quite picky with dates, and only accepts ISO dates
# with UTC time (i.e trailing Z)
# See http://lucene.apache.org/solr/api/org/apache/solr/schema/DateField.html
pkg_dict['metadata_created'] += 'Z'
pkg_dict['metadata_modified'] += 'Z'

# mark this CKAN instance as data source:
pkg_dict['site_id'] = config.get('ckan.site_id')

Expand Down
16 changes: 14 additions & 2 deletions ckan/tests/lib/test_solr_search_index.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from datetime import datetime
import hashlib
import socket
import solr
Expand Down Expand Up @@ -53,16 +54,25 @@ def _get_index_id(self,pkg_id):
return hashlib.md5('%s%s' % (pkg_id,config['ckan.site_id'])).hexdigest()

def test_index(self):

datetime_now = datetime.now()
pkg_dict = {
'id': u'penguin-id',
'title': u'penguin',
'state': u'active'
'state': u'active',
'metadata_created': datetime_now.isoformat(),
'metadata_modified': datetime_now.isoformat(),
}
search.dispatch_by_operation('Package', pkg_dict, 'new')
response = self.solr.query('title:penguin', fq=self.fq)
assert len(response) == 1, len(response)
assert response.results[0]['index_id'] == self._get_index_id (pkg_dict['id'])
assert response.results[0]['title'] == 'penguin'

# looks like solrpy messes with microseconds and time zones,
# so ignore them for testing
assert datetime_now.strftime('%Y-%m-%d %H:%M:%S') == response.results[0]['metadata_created'].strftime('%Y-%m-%d %H:%M:%S')
assert datetime_now.strftime('%Y-%m-%d %H:%M:%S') == response.results[0]['metadata_modified'].strftime('%Y-%m-%d %H:%M:%S')

def test_no_state_not_indexed(self):
pkg_dict = {
Expand All @@ -76,7 +86,9 @@ def test_index_clear(self):
pkg_dict = {
'id': u'penguin-id',
'title': u'penguin',
'state': u'active'
'state': u'active',
'metadata_created': datetime.now().isoformat(),
'metadata_modified': datetime.now().isoformat(),
}
search.dispatch_by_operation('Package', pkg_dict, 'new')
response = self.solr.query('title:penguin', fq=self.fq)
Expand Down

0 comments on commit 2ed1fd9

Please sign in to comment.