Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix bug on elastic search #12811

Merged
merged 6 commits into from
Sep 12, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 36 additions & 15 deletions modules/indexer/code/elastic_search.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ const (
},
"content": {
"type": "text",
"term_vector": "with_positions_offsets",
"index": true
},
"commit_id": {
Expand Down Expand Up @@ -251,6 +252,22 @@ func (b *ElasticSearchIndexer) Delete(repoID int64) error {
return err
}

// indexPos find words positions for start and the following end on content. It will
// return the beginning position of the frist start and the ending position of the
lafriks marked this conversation as resolved.
Show resolved Hide resolved
// first end following the start string.
// If not found any of the positions, it will return -1, -1.
func indexPos(content, start, end string) (int, int) {
lunny marked this conversation as resolved.
Show resolved Hide resolved
startIdx := strings.Index(content, start)
if startIdx < 0 {
return -1, -1
}
endIdx := strings.Index(content[startIdx+len(start):], end)
if endIdx < 0 {
return -1, -1
}
return startIdx, startIdx + len(start) + endIdx + len(end)
}

func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int) (int64, []*SearchResult, []*SearchResultLanguages, error) {
hits := make([]*SearchResult, 0, pageSize)
for _, hit := range searchResult.Hits.Hits {
Expand All @@ -260,18 +277,12 @@ func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int)
var startIndex, endIndex int = -1, -1
c, ok := hit.Highlight["content"]
if ok && len(c) > 0 {
var subStr = make([]rune, 0, len(kw))
startIndex = strings.IndexFunc(c[0], func(r rune) bool {
if len(subStr) >= len(kw) {
subStr = subStr[1:]
}
subStr = append(subStr, r)
return strings.EqualFold(kw, string(subStr))
})
if startIndex > -1 {
endIndex = startIndex + len(kw)
} else {
panic(fmt.Sprintf("1===%#v", hit.Highlight))
// FIXME: Since the high lighting content will include <em> and </em> for the keywords,
// now we should find the poisitions. But how to avoid html content which contains the
// <em> and </em> tags? If elastic search has handled that?
startIndex, endIndex = indexPos(c[0], "<em>", "</em>")
if startIndex == -1 {
panic(fmt.Sprintf("1===%s,,,%#v,,,%s", kw, hit.Highlight, c[0]))
}
} else {
panic(fmt.Sprintf("2===%#v", hit.Highlight))
Expand All @@ -293,7 +304,7 @@ func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int)
UpdatedUnix: timeutil.TimeStamp(res["updated_at"].(float64)),
Language: language,
StartIndex: startIndex,
EndIndex: endIndex,
EndIndex: endIndex - 9, // remove the length <em></em> since we give Content the original data
Color: enry.GetColor(language),
})
}
Expand Down Expand Up @@ -347,7 +358,12 @@ func (b *ElasticSearchIndexer) Search(repoIDs []int64, language, keyword string,
Index(b.indexerAliasName).
Aggregation("language", aggregation).
Query(query).
Highlight(elastic.NewHighlight().Field("content")).
Highlight(
elastic.NewHighlight().
Field("content").
NumOfFragments(0). // return all highting content on fragments
HighlighterType("fvh"),
).
Sort("repo_id", true).
From(start).Size(pageSize).
Do(context.Background())
Expand All @@ -373,7 +389,12 @@ func (b *ElasticSearchIndexer) Search(repoIDs []int64, language, keyword string,
searchResult, err := b.client.Search().
Index(b.indexerAliasName).
Query(query).
Highlight(elastic.NewHighlight().Field("content")).
Highlight(
elastic.NewHighlight().
Field("content").
NumOfFragments(0). // return all highting content on fragments
HighlighterType("fvh"),
).
Sort("repo_id", true).
From(start).Size(pageSize).
Do(context.Background())
Expand Down
6 changes: 6 additions & 0 deletions modules/indexer/code/elastic_search_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,9 @@ func TestESIndexAndSearch(t *testing.T) {

testIndexer("elastic_search", t, indexer)
}

func TestIndexPos(t *testing.T) {
startIdx, endIdx := indexPos("test index start and end", "start", "end")
assert.EqualValues(t, 11, startIdx)
assert.EqualValues(t, 24, endIdx)
}