Skip to content

Commit

Permalink
Merge pull request #7775 from ckan/solr-indexing-dates
Browse files Browse the repository at this point in the history
Improve handling of date fields in Solr
  • Loading branch information
kowh-ai committed Sep 11, 2023
2 parents 18df17b + a716af2 commit 5abd3c2
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 16 deletions.
1 change: 1 addition & 0 deletions changes/7775.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix error when indexing a full ISO date with timezone info
26 changes: 10 additions & 16 deletions ckan/lib/search/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@
import logging
import collections
import json
import datetime
import re
from dateutil.parser import parse
from dateutil.parser import parse, ParserError as DateParserError
from typing import Any, NoReturn, Optional

import six
Expand Down Expand Up @@ -235,27 +234,22 @@ def index_package(self,
pkg_dict['dataset_type'] = pkg_dict['type']

# clean the dict fixing keys and dates
# FIXME where are we getting these dirty keys from? can we not just
# fix them in the correct place or is this something that always will
# be needed? For my data not changing the keys seems to not cause a
# problem.
new_dict = {}
bogus_date = datetime.datetime(1, 1, 1)
for key, value in pkg_dict.items():
key = six.ensure_str(key)
if key.endswith('_date'):
if not value:
continue
try:
date = parse(value, default=bogus_date)
if date != bogus_date:
value = date.isoformat() + 'Z'
else:
# The date field was empty, so dateutil filled it with
# the default bogus date
value = None
except (IndexError, TypeError, ValueError):
log.error('%r: %r value of %r is not a valid date', pkg_dict['id'], key, value)
date = parse(value)
value = date.isoformat()
if not date.tzinfo:
value += 'Z'
except DateParserError:
log.warning('%r: %r value of %r is not a valid date', pkg_dict['id'], key, value)
continue
new_dict[key] = value

pkg_dict = new_dict

for k in ('title', 'notes', 'title_string'):
Expand Down
26 changes: 26 additions & 0 deletions ckan/tests/lib/search/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ def test_index_date_field(self):
"extras": [
{"key": "test_date", "value": "2014-03-22"},
{"key": "test_tim_date", "value": "2014-03-22 05:42:14"},
{"key": "test_full_iso_date", "value": "2019-10-10T01:15:00Z"},
]
}
)
Expand All @@ -142,6 +143,31 @@ def test_index_date_field(self):
response.docs[0]["test_tim_date"].strftime("%Y-%m-%d %H:%M:%S")
== "2014-03-22 05:42:14"
)
assert (
response.docs[0]["test_full_iso_date"].strftime("%Y-%m-%d %H:%M:%S")
== "2019-10-10 01:15:00"
)

def test_index_date_empty_value(self):

pkg_dict = self.base_package_dict.copy()
pkg_dict.update(
{
"extras": [
{"key": "test_empty_date", "value": ""},
{"key": "test_none_date", "value": None},
]
}
)

self.package_index.index_package(pkg_dict)

response = self.solr_client.search(q="name:monkey", fq=self.fq)

assert len(response) == 1

assert "test_empty_date" not in response.docs[0]
assert "test_none_date" not in response.docs[0]

def test_index_date_field_wrong_value(self):

Expand Down

0 comments on commit 5abd3c2

Please sign in to comment.