diff --git a/mapping/index.go b/mapping/index.go index e2ac99f39..99642bc40 100644 --- a/mapping/index.go +++ b/mapping/index.go @@ -417,23 +417,6 @@ func (im *IndexMappingImpl) DateTimeParserNamed(name string) analysis.DateTimePa return dateTimeParser } -func (im *IndexMappingImpl) datetimeParserNameForPath(path string) string { - - // first we look for explicit mapping on the field - for _, docMapping := range im.TypeMapping { - pathMapping, _ := docMapping.documentMappingForPath(path) - if pathMapping != nil { - if len(pathMapping.Fields) > 0 { - if pathMapping.Fields[0].Analyzer != "" { - return pathMapping.Fields[0].Analyzer - } - } - } - } - - return im.DefaultDateTimeParser -} - func (im *IndexMappingImpl) AnalyzeText(analyzerName string, text []byte) (analysis.TokenStream, error) { analyzer, err := im.cache.AnalyzerNamed(analyzerName) if err != nil { diff --git a/search/query/date_range.go b/search/query/date_range.go index 34844c976..bbb2a54ef 100644 --- a/search/query/date_range.go +++ b/search/query/date_range.go @@ -30,10 +30,10 @@ import ( index "github.com/blevesearch/bleve_index_api" ) -// QueryDateTimeParser controls the default query date time parser +// QueryDateTimeParser controls the default query date time parser. var QueryDateTimeParser = optional.Name -// QueryDateTimeFormat controls the format when Marshaling to JSON +// QueryDateTimeFormat controls the format when Marshaling to JSON. var QueryDateTimeFormat = time.RFC3339 var cache = registry.NewCache() diff --git a/search/query/date_range_string.go b/search/query/date_range_string.go new file mode 100644 index 000000000..b5e5c1701 --- /dev/null +++ b/search/query/date_range_string.go @@ -0,0 +1,176 @@ +// Copyright (c) 2023 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package query + +import ( + "context" + "fmt" + "math" + "time" + + "github.com/blevesearch/bleve/v2/mapping" + "github.com/blevesearch/bleve/v2/numeric" + "github.com/blevesearch/bleve/v2/search" + "github.com/blevesearch/bleve/v2/search/searcher" + index "github.com/blevesearch/bleve_index_api" +) + +// DateRangeStringQuery represents a query for a range of date values. +// Start and End are the range endpoints, as strings. +// Start and End are parsed using DateTimeParser, which is a custom date time parser +// defined in the index mapping. If DateTimeParser is not specified, then the +// top-level config.QueryDateTimeParser is used. +type DateRangeStringQuery struct { + Start string `json:"start,omitempty"` + End string `json:"end,omitempty"` + InclusiveStart *bool `json:"inclusive_start,omitempty"` + InclusiveEnd *bool `json:"inclusive_end,omitempty"` + FieldVal string `json:"field,omitempty"` + BoostVal *Boost `json:"boost,omitempty"` + DateTimeParser string `json:"datetime_parser,omitempty"` +} + +// NewDateRangeStringQuery creates a new Query for ranges +// of date values. +// Date strings are parsed using the DateTimeParser field of the query struct, +// which is a custom date time parser defined in the index mapping. +// if DateTimeParser is not specified, then the +// top-level config.QueryDateTimeParser is used. +// Either, but not both endpoints can be nil. +func NewDateRangeStringQuery(start, end string) *DateRangeStringQuery { + return NewDateRangeStringInclusiveQuery(start, end, nil, nil) +} + +// NewDateRangeStringQuery creates a new Query for ranges +// of date values. +// Date strings are parsed using the DateTimeParser field of the query struct, +// which is a custom date time parser defined in the index mapping. +// if DateTimeParser is not specified, then the +// top-level config.QueryDateTimeParser is used. +// Either, but not both endpoints can be nil. +// startInclusive and endInclusive control inclusion of the endpoints. +func NewDateRangeStringInclusiveQuery(start, end string, startInclusive, endInclusive *bool) *DateRangeStringQuery { + return &DateRangeStringQuery{ + Start: start, + End: end, + InclusiveStart: startInclusive, + InclusiveEnd: endInclusive, + } +} + +func (q *DateRangeStringQuery) SetBoost(b float64) { + boost := Boost(b) + q.BoostVal = &boost +} + +func (q *DateRangeStringQuery) Boost() float64 { + return q.BoostVal.Value() +} + +func (q *DateRangeStringQuery) SetField(f string) { + q.FieldVal = f +} + +func (q *DateRangeStringQuery) Field() string { + return q.FieldVal +} + +func (q *DateRangeStringQuery) SetDateTimeParser(d string) { + q.DateTimeParser = d +} + +func (q *DateRangeStringQuery) DateTimeParserName() string { + return q.DateTimeParser +} + +func (q *DateRangeStringQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { + field := q.FieldVal + if q.FieldVal == "" { + field = m.DefaultSearchField() + } + + dateTimeParserName := QueryDateTimeParser + if q.DateTimeParser != "" { + dateTimeParserName = q.DateTimeParser + } + dateTimeParser := m.DateTimeParserNamed(dateTimeParserName) + if dateTimeParser == nil { + return nil, fmt.Errorf("no dateTimeParser named '%s' registered", dateTimeParserName) + } + + var startTime, endTime time.Time + var err error + if q.Start != "" { + startTime, _, err = dateTimeParser.ParseDateTime(q.Start) + if err != nil { + return nil, fmt.Errorf("%v, date time parser name: %s", err, dateTimeParserName) + } + } + if q.End != "" { + endTime, _, err = dateTimeParser.ParseDateTime(q.End) + if err != nil { + return nil, fmt.Errorf("%v, date time parser name: %s", err, dateTimeParserName) + } + } + + min, max, err := q.parseEndpoints(startTime, endTime) + if err != nil { + return nil, err + } + return searcher.NewNumericRangeSearcher(ctx, i, min, max, q.InclusiveStart, q.InclusiveEnd, field, q.BoostVal.Value(), options) +} + +func (q *DateRangeStringQuery) parseEndpoints(startTime, endTime time.Time) (*float64, *float64, error) { + min := math.Inf(-1) + max := math.Inf(1) + + if startTime.IsZero() && endTime.IsZero() { + return nil, nil, fmt.Errorf("date range query must specify at least one of start/end") + } + + if !startTime.IsZero() { + if !isDateTimeWithinRange(startTime) { + // overflow + return nil, nil, fmt.Errorf("invalid/unsupported date range, start: %v", q.Start) + } + startInt64 := startTime.UnixNano() + min = numeric.Int64ToFloat64(startInt64) + } + if !endTime.IsZero() { + if !isDateTimeWithinRange(endTime) { + // overflow + return nil, nil, fmt.Errorf("invalid/unsupported date range, end: %v", q.End) + } + endInt64 := endTime.UnixNano() + max = numeric.Int64ToFloat64(endInt64) + } + + return &min, &max, nil +} + +func (q *DateRangeStringQuery) Validate() error { + // either start or end must be specified + if q.Start == "" && q.End == "" { + return fmt.Errorf("date range query must specify at least one of start/end") + } + return nil +} + +func isDateTimeWithinRange(t time.Time) bool { + if t.Before(MinRFC3339CompatibleTime) || t.After(MaxRFC3339CompatibleTime) { + return false + } + return true +} diff --git a/search/query/query.go b/search/query/query.go index a4e0f015a..1ef97ff8a 100644 --- a/search/query/query.go +++ b/search/query/query.go @@ -185,7 +185,7 @@ func ParseQuery(input []byte) (Query, error) { _, hasStart := tmp["start"] _, hasEnd := tmp["end"] if hasStart || hasEnd { - var rv DateRangeQuery + var rv DateRangeStringQuery err := json.Unmarshal(input, &rv) if err != nil { return nil, err diff --git a/search/query/query_test.go b/search/query/query_test.go index 0082e8acf..228fb65cd 100644 --- a/search/query/query_test.go +++ b/search/query/query_test.go @@ -176,7 +176,7 @@ func TestParseQuery(t *testing.T) { { input: []byte(`{"start":"` + startDateStr + `","end":"` + endDateStr + `","field":"desc"}`), output: func() Query { - q := NewDateRangeQuery(startDate, endDate) + q := NewDateRangeStringQuery(startDateStr, endDateStr) q.SetField("desc") return q }(), diff --git a/search_test.go b/search_test.go index 414c907c8..6ddb861a9 100644 --- a/search_test.go +++ b/search_test.go @@ -2473,3 +2473,297 @@ func TestCustomDateTimeParserLayoutValidation(t *testing.T) { } } } + +func TestDateRangeStringQuery(t *testing.T) { + idxMapping := NewIndexMapping() + + err := idxMapping.AddCustomDateTimeParser("customDT", map[string]interface{}{ + "type": sanitized.Name, + "layouts": []interface{}{ + "02/01/2006 15:04:05", + "2006/01/02 3:04PM", + }, + }) + + if err != nil { + t.Fatal(err) + } + + err = idxMapping.AddCustomDateTimeParser("queryDT", map[string]interface{}{ + "type": sanitized.Name, + "layouts": []interface{}{ + "02/01/2006 3:04PM", + }, + }) + + if err != nil { + t.Fatal(err) + } + + dtmap := NewDateTimeFieldMapping() + dtmap.DateFormat = "customDT" + idxMapping.DefaultMapping.AddFieldMappingsAt("date", dtmap) + + tmpIndexPath := createTmpIndexPath(t) + defer cleanupTmpIndexPath(t, tmpIndexPath) + + idx, err := New(tmpIndexPath, idxMapping) + if err != nil { + t.Fatal(err) + } + defer func() { + err = idx.Close() + if err != nil { + t.Fatal(err) + } + }() + documents := map[string]map[string]interface{}{ + "doc1": { + "date": "2001/08/20 6:00PM", + }, + "doc2": { + "date": "20/08/2001 18:00:20", + }, + "doc3": { + "date": "20/08/2001 18:10:00", + }, + "doc4": { + "date": "2001/08/20 6:15PM", + }, + "doc5": { + "date": "20/08/2001 18:20:00", + }, + } + + batch := idx.NewBatch() + for docID, doc := range documents { + err := batch.Index(docID, doc) + if err != nil { + t.Fatal(err) + } + } + err = idx.Batch(batch) + if err != nil { + t.Fatal(err) + } + + type testResult struct { + docID string // doc ID of the hit + hitField string // fields returned as part of the hit + } + + type testStruct struct { + start string + end string + field string + dateTimeParser string // name of the custom date time parser to use if nil, use QueryDateTimeParser + includeStart bool + includeEnd bool + expectedHits []testResult + err error + } + + testQueries := []testStruct{ + // test cases with RFC3339 parser and toggling includeStart and includeEnd + { + start: "2001-08-20T18:00:00", + end: "2001-08-20T18:10:00", + field: "date", + includeStart: true, + includeEnd: true, + expectedHits: []testResult{ + { + docID: "doc1", + hitField: "2001/08/20 6:00PM", + }, + { + docID: "doc2", + hitField: "20/08/2001 18:00:20", + }, + { + docID: "doc3", + hitField: "20/08/2001 18:10:00", + }, + }, + }, + { + start: "2001-08-20T18:00:00", + end: "2001-08-20T18:10:00", + field: "date", + includeStart: false, + includeEnd: true, + expectedHits: []testResult{ + { + docID: "doc2", + hitField: "20/08/2001 18:00:20", + }, + { + docID: "doc3", + hitField: "20/08/2001 18:10:00", + }, + }, + }, + { + start: "2001-08-20T18:00:00", + end: "2001-08-20T18:10:00", + field: "date", + includeStart: false, + includeEnd: false, + expectedHits: []testResult{ + { + docID: "doc2", + hitField: "20/08/2001 18:00:20", + }, + }, + }, + // test cases with custom parser and omitting start and end + { + start: "20/08/2001 18:00:00", + end: "2001/08/20 6:10PM", + field: "date", + dateTimeParser: "customDT", + includeStart: true, + includeEnd: true, + expectedHits: []testResult{ + { + docID: "doc1", + hitField: "2001/08/20 6:00PM", + }, + { + docID: "doc2", + hitField: "20/08/2001 18:00:20", + }, + { + docID: "doc3", + hitField: "20/08/2001 18:10:00", + }, + }, + }, + { + end: "20/08/2001 18:15:00", + field: "date", + dateTimeParser: "customDT", + includeStart: true, + includeEnd: true, + expectedHits: []testResult{ + { + docID: "doc1", + hitField: "2001/08/20 6:00PM", + }, + { + docID: "doc2", + hitField: "20/08/2001 18:00:20", + }, + { + docID: "doc3", + hitField: "20/08/2001 18:10:00", + }, + { + docID: "doc4", + hitField: "2001/08/20 6:15PM", + }, + }, + }, + { + start: "2001/08/20 6:15PM", + field: "date", + dateTimeParser: "customDT", + includeStart: true, + includeEnd: true, + expectedHits: []testResult{ + { + docID: "doc4", + hitField: "2001/08/20 6:15PM", + }, + { + docID: "doc5", + hitField: "20/08/2001 18:20:00", + }, + }, + }, + { + start: "20/08/2001 6:15PM", + field: "date", + dateTimeParser: "queryDT", + includeStart: true, + includeEnd: true, + expectedHits: []testResult{ + { + docID: "doc4", + hitField: "2001/08/20 6:15PM", + }, + { + docID: "doc5", + hitField: "20/08/2001 18:20:00", + }, + }, + }, + // error path test cases + { + field: "date", + dateTimeParser: "customDT", + includeStart: true, + includeEnd: true, + err: fmt.Errorf("date range query must specify at least one of start/end"), + }, + { + field: "date", + includeStart: true, + includeEnd: true, + err: fmt.Errorf("date range query must specify at least one of start/end"), + }, + { + start: "2001-08-20T18:00:00", + end: "2001-08-20T18:10:00", + field: "date", + dateTimeParser: "customDT", + err: fmt.Errorf("unable to parse datetime with any of the layouts, date time parser name: customDT"), + }, + { + start: "3001-08-20T18:00:00", + end: "2001-08-20T18:10:00", + field: "date", + err: fmt.Errorf("invalid/unsupported date range, start: 3001-08-20T18:00:00"), + }, + { + start: "2001/08/20 6:00PM", + end: "3001/08/20 6:30PM", + field: "date", + dateTimeParser: "customDT", + err: fmt.Errorf("invalid/unsupported date range, end: 3001/08/20 6:30PM"), + }, + } + + for _, dtq := range testQueries { + var err error + dateQuery := query.NewDateRangeStringInclusiveQuery(dtq.start, dtq.end, &dtq.includeStart, &dtq.includeEnd) + dateQuery.SetDateTimeParser(dtq.dateTimeParser) + dateQuery.SetField(dtq.field) + + sr := NewSearchRequest(dateQuery) + sr.SortBy([]string{dtq.field}) + sr.Fields = []string{dtq.field} + + res, err := idx.Search(sr) + if err != nil { + if dtq.err == nil { + t.Fatalf("expected no error, got: %v", err) + } + if dtq.err.Error() != err.Error() { + t.Fatalf("expected error: %v, got: %v", dtq.err, err) + } + continue + } + if len(res.Hits) != len(dtq.expectedHits) { + t.Fatalf("expected %d hits, got %d", len(dtq.expectedHits), len(res.Hits)) + } + for i, hit := range res.Hits { + if hit.ID != dtq.expectedHits[i].docID { + t.Fatalf("expected docID %s, got %s", dtq.expectedHits[i].docID, hit.ID) + } + if hit.Fields[dtq.field].(string) != dtq.expectedHits[i].hitField { + t.Fatalf("expected hit field %s, got %s", dtq.expectedHits[i].hitField, hit.Fields[dtq.field]) + } + } + } +}