Skip to content

Commit

Permalink
Fix issue in parsing datetime field (#2030)
Browse files Browse the repository at this point in the history
Addresses #2027

In bleve v2.3.10, [a bug
fix](#1868) was introduced
where the value of a datetime field in the Fields part of a search hit
was made to match the value actually present in the document.
- For example if a document A was indexed and had value of a datetime
field X to be "2001/08/20 03:00:10", and ifA was returned as part of the
search result, A's Fields will have a map which would look like
- Before Fix 
    - "X" : "2001-08-20T03:00:10Z"
- After fix 
    - "X" : "2001/08/20 03:00:10"

This would come to play in case of custom user defined date time parser,
and we did this by storing the layout with which the time string was
parsed with during indexing. With timestamp support being added at the
same time, they did not really have a valid layout to store and hence
stored nil. During queries, the layout extracted, if null, was always
assumed to be for the timestamp case _but_ the case where an existing
index from pre v2.3.10 (which would also return null layout) was not
handled and it became a catchall case leading to the datetime string
being always returned in a timestamp rather than the expected default
RFC3339.
  • Loading branch information
CascadingRadium committed May 3, 2024
1 parent c76f76d commit 490c4b9
Show file tree
Hide file tree
Showing 7 changed files with 80 additions and 74 deletions.
2 changes: 1 addition & 1 deletion analysis/datetime/timestamp/microseconds/microseconds.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error)
if timestamp < minBound || timestamp > maxBound {
return time.Time{}, "", analysis.ErrInvalidTimestampRange
}
return time.UnixMicro(timestamp), "", nil
return time.UnixMicro(timestamp), Name, nil
}

func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
Expand Down
2 changes: 1 addition & 1 deletion analysis/datetime/timestamp/milliseconds/milliseconds.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error)
if timestamp < minBound || timestamp > maxBound {
return time.Time{}, "", analysis.ErrInvalidTimestampRange
}
return time.UnixMilli(timestamp), "", nil
return time.UnixMilli(timestamp), Name, nil
}

func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
Expand Down
2 changes: 1 addition & 1 deletion analysis/datetime/timestamp/nanoseconds/nanoseconds.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error)
if timestamp < minBound || timestamp > maxBound {
return time.Time{}, "", analysis.ErrInvalidTimestampRange
}
return time.Unix(0, timestamp), "", nil
return time.Unix(0, timestamp), Name, nil
}

func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
Expand Down
2 changes: 1 addition & 1 deletion analysis/datetime/timestamp/seconds/seconds.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error)
if timestamp < minBound || timestamp > maxBound {
return time.Time{}, "", analysis.ErrInvalidTimestampRange
}
return time.Unix(timestamp, 0), "", nil
return time.Unix(timestamp, 0), Name, nil
}

func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
Expand Down
25 changes: 22 additions & 3 deletions http/doc_get.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,12 @@ import (
"fmt"
"net/http"
"strconv"
"time"

"github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/microseconds"
"github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/milliseconds"
"github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/nanoseconds"
"github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/seconds"
index "github.com/blevesearch/bleve_index_api"
)

Expand Down Expand Up @@ -91,10 +96,24 @@ func (h *DocGetHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) {
d, layout, err := field.DateTime()
if err == nil {
if layout == "" {
// layout not set probably means it was indexed as a timestamp
newval = strconv.FormatInt(d.UnixNano(), 10)
// missing layout means we fallback to
// the default layout which is RFC3339
newval = d.Format(time.RFC3339)
} else {
newval = d.Format(layout)
// the layout here can now either be representative
// of an actual layout or a timestamp
switch layout {
case seconds.Name:
newval = strconv.FormatInt(d.Unix(), 10)
case milliseconds.Name:
newval = strconv.FormatInt(d.UnixMilli(), 10)
case microseconds.Name:
newval = strconv.FormatInt(d.UnixMicro(), 10)
case nanoseconds.Name:
newval = strconv.FormatInt(d.UnixNano(), 10)
default:
newval = d.Format(layout)
}
}
}
}
Expand Down
28 changes: 25 additions & 3 deletions index_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ import (
"sync/atomic"
"time"

"github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/microseconds"
"github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/milliseconds"
"github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/nanoseconds"
"github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/seconds"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/index/scorch"
"github.com/blevesearch/bleve/v2/index/upsidedown"
Expand Down Expand Up @@ -738,10 +742,28 @@ func LoadAndHighlightFields(hit *search.DocumentMatch, req *SearchRequest,
datetime, layout, err := docF.DateTime()
if err == nil {
if layout == "" {
// layout not set probably means it was indexed as a timestamp
value = strconv.FormatInt(datetime.UnixNano(), 10)
// missing layout means we fallback to
// the default layout which is RFC3339
value = datetime.Format(time.RFC3339)
} else {
value = datetime.Format(layout)
// the layout here can now either be representative
// of an actual datetime layout or a timestamp
switch layout {
case seconds.Name:
value = strconv.FormatInt(datetime.Unix(), 10)
case milliseconds.Name:
value = strconv.FormatInt(datetime.UnixMilli(), 10)
case microseconds.Name:
value = strconv.FormatInt(datetime.UnixMicro(), 10)
case nanoseconds.Name:
value = strconv.FormatInt(datetime.UnixNano(), 10)
default:
// the layout for formatting the date to a string
// is provided by a datetime parser which is not
// handling the timestamp case, hence the layout
// can be directly used to format the date
value = datetime.Format(layout)
}
}
}
case index.BooleanField:
Expand Down
93 changes: 29 additions & 64 deletions search_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3116,7 +3116,7 @@ func TestDateRangeTimestampQueries(t *testing.T) {
}
}()

documents := map[string]map[string]interface{}{
documents := map[string]map[string]string{
"doc1": {
"date": "2001/08/20 03:00:10",
"seconds": "998276410",
Expand Down Expand Up @@ -3166,99 +3166,59 @@ func TestDateRangeTimestampQueries(t *testing.T) {
t.Fatal(err)
}

type testResult struct {
docID string // doc ID of the hit
hitField string // fields returned as part of the hit
}
type testStruct struct {
start string
end string
field string
expectedHits []testResult
expectedHits []string
}

testQueries := []testStruct{
{
start: "2001-08-20T03:00:05",
end: "2001-08-20T03:00:25",
field: "date",
expectedHits: []testResult{
{
docID: "doc1",
hitField: "2001/08/20 03:00:10",
},
{
docID: "doc2",
hitField: "2001/08/20 03:00:20",
},
expectedHits: []string{
"doc1",
"doc2",
},
},
{
start: "2001-08-20T03:00:15",
end: "2001-08-20T03:00:35",
field: "seconds",
expectedHits: []testResult{
{
docID: "doc2",
hitField: "998276420000000000",
},
{
docID: "doc3",
hitField: "998276430000000000",
},
expectedHits: []string{
"doc2",
"doc3",
},
},
{
start: "2001-08-20T03:00:10.150",
end: "2001-08-20T03:00:10.450",
field: "milliseconds",
expectedHits: []testResult{
{
docID: "doc2",
hitField: "998276410200000000",
},
{
docID: "doc3",
hitField: "998276410300000000",
},
{
docID: "doc4",
hitField: "998276410400000000",
},
expectedHits: []string{
"doc2",
"doc3",
"doc4",
},
},
{
start: "2001-08-20T03:00:10.100450",
end: "2001-08-20T03:00:10.100650",
field: "microseconds",
expectedHits: []testResult{
{
docID: "doc3",
hitField: "998276410100500000",
},
{
docID: "doc4",
hitField: "998276410100600000",
},
expectedHits: []string{
"doc3",
"doc4",
},
},
{
start: "2001-08-20T03:00:10.100300550",
end: "2001-08-20T03:00:10.100300850",
field: "nanoseconds",
expectedHits: []testResult{
{
docID: "doc3",
hitField: "998276410100300600",
},
{
docID: "doc4",
hitField: "998276410100300700",
},
{
docID: "doc5",
hitField: "998276410100300800",
},
expectedHits: []string{
"doc3",
"doc4",
"doc5",
},
},
}
Expand All @@ -3277,7 +3237,7 @@ func TestDateRangeTimestampQueries(t *testing.T) {

sr := NewSearchRequest(drq)
sr.SortBy([]string{dtq.field})
sr.Fields = []string{dtq.field}
sr.Fields = []string{"*"}

res, err := idx.Search(sr)
if err != nil {
Expand All @@ -3287,11 +3247,16 @@ func TestDateRangeTimestampQueries(t *testing.T) {
t.Fatalf("expected %d hits, got %d", len(dtq.expectedHits), len(res.Hits))
}
for i, hit := range res.Hits {
if hit.ID != dtq.expectedHits[i].docID {
t.Fatalf("expected docID %s, got %s", dtq.expectedHits[i].docID, hit.ID)
if hit.ID != dtq.expectedHits[i] {
t.Fatalf("expected docID %s, got %s", dtq.expectedHits[i], hit.ID)
}
if hit.Fields[dtq.field].(string) != dtq.expectedHits[i].hitField {
t.Fatalf("expected hit field %s, got %s", dtq.expectedHits[i].hitField, hit.Fields[dtq.field])
if len(hit.Fields) != len(documents[hit.ID]) {
t.Fatalf("expected hit %s to have %d fields, got %d", hit.ID, len(documents[hit.ID]), len(hit.Fields))
}
for k, v := range documents[hit.ID] {
if hit.Fields[k] != v {
t.Fatalf("expected field %s to be %s, got %s", k, v, hit.Fields[k])
}
}
}
}
Expand Down

0 comments on commit 490c4b9

Please sign in to comment.