Skip to content

Commit

Permalink
David leifker/elasticsearch optimization ext (#6920)
Browse files Browse the repository at this point in the history
  • Loading branch information
david-leifker committed Jan 2, 2023
1 parent 4915420 commit 656ceb6
Show file tree
Hide file tree
Showing 27 changed files with 152 additions and 96 deletions.
1 change: 1 addition & 0 deletions docker/datahub-gms/env/docker-without-neo4j.env
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
BUILD_INDICES_HISTORY_KAFKA_CONSUMER_GROUP_ID=generic-bihe-consumer-job-client-gms
EBEAN_DATASOURCE_USERNAME=datahub
EBEAN_DATASOURCE_PASSWORD=datahub
EBEAN_DATASOURCE_HOST=mysql:3306
Expand Down
1 change: 1 addition & 0 deletions docker/datahub-gms/env/docker.cassandra.env
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
BUILD_INDICES_HISTORY_KAFKA_CONSUMER_GROUP_ID=generic-bihe-consumer-job-client-gms
KAFKA_BOOTSTRAP_SERVER=broker:29092
KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
ELASTICSEARCH_HOST=elasticsearch
Expand Down
1 change: 1 addition & 0 deletions docker/datahub-gms/env/docker.mariadb.env
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
BUILD_INDICES_HISTORY_KAFKA_CONSUMER_GROUP_ID=generic-bihe-consumer-job-client-gms
EBEAN_DATASOURCE_USERNAME=datahub
EBEAN_DATASOURCE_PASSWORD=datahub
EBEAN_DATASOURCE_HOST=mariadb:3306
Expand Down
1 change: 1 addition & 0 deletions docker/datahub-gms/env/docker.postgres.env
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
BUILD_INDICES_HISTORY_KAFKA_CONSUMER_GROUP_ID=generic-bihe-consumer-job-client-gms
EBEAN_DATASOURCE_USERNAME=datahub
EBEAN_DATASOURCE_PASSWORD=datahub
EBEAN_DATASOURCE_HOST=postgres:5432
Expand Down
1 change: 1 addition & 0 deletions docker/datahub-mae-consumer/env/docker-without-neo4j.env
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
BUILD_INDICES_HISTORY_KAFKA_CONSUMER_GROUP_ID=generic-bihe-consumer-job-client-mcl
DATAHUB_GMS_HOST=datahub-gms
DATAHUB_GMS_PORT=8080

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ services:
environment:
- DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart}
- DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true}
- BUILD_INDICES_HISTORY_KAFKA_CONSUMER_GROUP_ID=generic-bihe-consumer-job-client-gms
- EBEAN_DATASOURCE_USERNAME=datahub
- EBEAN_DATASOURCE_PASSWORD=datahub
- EBEAN_DATASOURCE_HOST=mysql:3306
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ services:
datahub-mae-consumer:
container_name: datahub-mae-consumer
environment:
- BUILD_INDICES_HISTORY_KAFKA_CONSUMER_GROUP_ID=generic-bihe-consumer-job-client-mcl
- DATAHUB_GMS_HOST=datahub-gms
- DATAHUB_GMS_PORT=8080
- MAE_CONSUMER_ENABLED=true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import com.linkedin.metadata.query.AutoCompleteResult;
import com.linkedin.metadata.query.ListResult;
import com.linkedin.metadata.query.ListUrnsResult;
import com.linkedin.metadata.query.SearchFlags;
import com.linkedin.metadata.query.filter.Filter;
import com.linkedin.metadata.query.filter.SortCriterion;
import com.linkedin.metadata.entity.AspectUtils;
Expand Down Expand Up @@ -248,15 +249,15 @@ public SearchResult search(
int start,
int count,
@Nonnull Authentication authentication,
@Nullable Boolean structured)
@Nullable Boolean fulltext)
throws RemoteInvocationException {

if (Optional.ofNullable(structured).orElse(true)) {
if (Optional.ofNullable(fulltext).orElse(false)) {
return ValidationUtils.validateSearchResult(
_entitySearchService.structuredSearch(entity, input, newFilter(requestFilters), null, start, count), _entityService);
_entitySearchService.fullTextSearch(entity, input, newFilter(requestFilters), null, start, count), _entityService);
} else {
return ValidationUtils.validateSearchResult(
_entitySearchService.fullTextSearch(entity, input, newFilter(requestFilters), null, start, count), _entityService);
_entitySearchService.structuredSearch(entity, input, newFilter(requestFilters), null, start, count), _entityService);
}
}

Expand Down Expand Up @@ -305,15 +306,15 @@ public SearchResult search(
int start,
int count,
@Nonnull Authentication authentication,
@Nullable Boolean structured)
@Nullable Boolean fulltext)
throws RemoteInvocationException {
if (Optional.ofNullable(structured).orElse(true)) {
if (Optional.ofNullable(fulltext).orElse(false)) {
return ValidationUtils.validateSearchResult(
_entitySearchService.structuredSearch(entity, input, filter, sortCriterion, start, count),
_entitySearchService.fullTextSearch(entity, input, filter, sortCriterion, start, count),
_entityService);
} else {
return ValidationUtils.validateSearchResult(
_entitySearchService.fullTextSearch(entity, input, filter, sortCriterion, start, count),
_entitySearchService.structuredSearch(entity, input, filter, sortCriterion, start, count),
_entityService);
}
}
Expand All @@ -338,7 +339,8 @@ public SearchResult searchAcrossEntities(
int count,
@Nonnull final Authentication authentication) throws RemoteInvocationException {
return ValidationUtils.validateSearchResult(
_searchService.searchAcrossEntities(entities, input, filter, null, start, count, null), _entityService);
_searchService.searchAcrossEntities(entities, input, filter, null, start, count,
new SearchFlags().setFulltext(true)), _entityService);
}

@Nonnull
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ public SearchResult searchAcrossEntities(@Nonnull List<String> entities, @Nonnul
log.debug(String.format(
"Searching Search documents entities: %s, input: %s, postFilters: %s, sortCriterion: %s, from: %s, size: %s",
entities, input, postFilters, sortCriterion, from, size));
SearchFlags forceFlags = Optional.ofNullable(searchFlags).orElse(new SearchFlags()).setStructured(false);
return _cachingAllEntitiesSearchAggregator.getSearchResults(entities, input, postFilters, sortCriterion, from, size, forceFlags);
return _cachingAllEntitiesSearchAggregator.getSearchResults(entities, input, postFilters, sortCriterion, from, size, searchFlags);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ public SearchResult getCachedSearchResults(
cacheManager.getCache(ENTITY_SEARCH_SERVICE_SEARCH_CACHE_NAME),
batchSize,
querySize -> getRawSearchResults(entityName, query, filters, sortCriterion, querySize.getFrom(),
querySize.getSize(), searchFlags.isStructured()),
querySize.getSize(), Boolean.TRUE.equals(searchFlags.isFulltext())),
querySize -> Quintet.with(entityName, query, filters, sortCriterion, querySize), flags, enableCache).getSearchResults(from, size);
}

Expand Down Expand Up @@ -197,17 +197,17 @@ private SearchResult getRawSearchResults(
final SortCriterion sortCriterion,
final int start,
final int count,
final boolean structured) {
if (structured) {
return entitySearchService.structuredSearch(
final boolean fulltext) {
if (fulltext) {
return entitySearchService.fullTextSearch(
entityName,
input,
filters,
sortCriterion,
start,
count);
} else {
return entitySearchService.fullTextSearch(
return entitySearchService.structuredSearch(
entityName,
input,
filters,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ public SearchResult fullTextSearch(@Nonnull String entityName, @Nonnull String i
log.debug(String.format(
"Searching FullText Search documents entityName: %s, input: %s, postFilters: %s, sortCriterion: %s, from: %s, size: %s",
entityName, input, postFilters, sortCriterion, from, size));
return esSearchDAO.search(entityName, input, postFilters, sortCriterion, from, size, false);
return esSearchDAO.search(entityName, input, postFilters, sortCriterion, from, size, true);
}

@Nonnull
Expand All @@ -118,7 +118,7 @@ public SearchResult structuredSearch(@Nonnull String entityName, @Nonnull String
log.debug(String.format(
"Searching Structured Search documents entityName: %s, input: %s, postFilters: %s, sortCriterion: %s, from: %s, size: %s",
entityName, input, postFilters, sortCriterion, from, size));
return esSearchDAO.search(entityName, input, postFilters, sortCriterion, from, size, true);
return esSearchDAO.search(entityName, input, postFilters, sortCriterion, from, size, false);
}

@Nonnull
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ public class SettingsBuilder {
public static final String NORMALIZER = "normalizer";
public static final String PATTERN = "pattern";
public static final String PATTERNS = "patterns";
public static final String REPLACEMENT = "replacement";
public static final String PRESERVE_ORIGINAL = "preserve_original";
public static final String SEARCH_ANALYZER = "search_analyzer";
public static final String SPLIT_ON_NUMERICS = "split_on_numerics";
Expand Down Expand Up @@ -69,6 +70,7 @@ public class SettingsBuilder {
public static final String FLATTEN_GRAPH = "flatten_graph";
public static final String LOWERCASE = "lowercase";
public static final String MIN_LENGTH_2 = "min_length_2";
public static final String REPLACE_NUM_LENGTH_3 = "replace_num_length_3";
public static final String MULTIFILTER = "multifilter";
public static final String MULTIFILTER_GRAPH = "multifilter_graph";
public static final String PARTIAL_URN_COMPONENT = "partial_urn_component";
Expand Down Expand Up @@ -100,6 +102,7 @@ public class SettingsBuilder {
public static final String SLASH_TOKENIZER = "slash_tokenizer";

public static final List<String> ALPHA_ONLY_PATTERNS = ImmutableList.of("([a-z0-9]{2,})");
public static final String NUM_LENGTH_3_PATTERN = "(^[0-9]{1,3}$)";
public static final List<String> URN_STOP_WORDS = ImmutableList.of("urn", "li");

public final Map<String, Object> settings;
Expand Down Expand Up @@ -136,7 +139,7 @@ private static Map<String, Object> buildFilters() throws IOException {
// Filter to split string into words
filters.put(CUSTOM_DELIMITER, ImmutableMap.<String, Object>builder()
.put(TYPE, WORD_DELIMITER)
.put(SPLIT_ON_NUMERICS, false)
.put(SPLIT_ON_NUMERICS, true)
.put(PRESERVE_ORIGINAL, true)
.put(TYPE_TABLE, ImmutableList.of(
COLON_SUBWORD_DELIMITER
Expand All @@ -145,7 +148,7 @@ private static Map<String, Object> buildFilters() throws IOException {

filters.put(CUSTOM_DELIMITER_GRAPH, ImmutableMap.<String, Object>builder()
.put(TYPE, WORD_DELIMITER_GRAPH)
.put(SPLIT_ON_NUMERICS, false)
.put(SPLIT_ON_NUMERICS, true)
.put(PRESERVE_ORIGINAL, true)
.put(TYPE_TABLE, ImmutableList.of(
COLON_SUBWORD_DELIMITER
Expand Down Expand Up @@ -187,6 +190,12 @@ private static Map<String, Object> buildFilters() throws IOException {
.put(PATTERNS, ALPHA_ONLY_PATTERNS)
.build());

filters.put(REPLACE_NUM_LENGTH_3, ImmutableMap.builder()
.put(TYPE, "pattern_replace")
.put(PATTERN, NUM_LENGTH_3_PATTERN)
.put(REPLACEMENT, "")
.build());

filters.put(SHINGLE_2_3, ImmutableMap.<String, Object>builder()
.put(TYPE, "shingle")
.put(MIN_SHINGLE_SIZE, "2")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,18 +72,18 @@ private SearchResult executeAndExtract(@Nonnull EntitySpec entitySpec, @Nonnull
* @param sortCriterion {@link SortCriterion} to be applied to search results
* @param from index to start the search from
* @param size the number of search hits to return
* @param structured Structured or full text search modes
* @param fulltext Structured or full text search modes
* @return a {@link com.linkedin.metadata.dao.SearchResult} that contains a list of matched documents and related search result metadata
*/
@Nonnull
public SearchResult search(@Nonnull String entityName, @Nonnull String input, @Nullable Filter postFilters,
@Nullable SortCriterion sortCriterion, int from, int size, boolean structured) {
@Nullable SortCriterion sortCriterion, int from, int size, boolean fulltext) {
final String finalInput = input.isEmpty() ? "*" : input;
Timer.Context searchRequestTimer = MetricUtils.timer(this.getClass(), "searchRequest").time();
EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName);
// Step 1: construct the query
final SearchRequest searchRequest = SearchRequestHandler.getBuilder(entitySpec)
.getSearchRequest(finalInput, postFilters, sortCriterion, from, size, structured);
.getSearchRequest(finalInput, postFilters, sortCriterion, from, size, fulltext);
searchRequest.indices(indexConvention.getIndexName(entitySpec));
searchRequestTimer.stop();
// Step 2: execute the query and extract results, validated against document model as well
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,12 @@ public class SearchQueryBuilder {
private SearchQueryBuilder() {
}

public static QueryBuilder buildQuery(@Nonnull EntitySpec entitySpec, @Nonnull String query, boolean structured) {
public static QueryBuilder buildQuery(@Nonnull EntitySpec entitySpec, @Nonnull String query, boolean fulltext) {
final QueryBuilder queryBuilder;
if (structured) {
queryBuilder = buildInternalQuery(entitySpec, query, false, true);
} else {
if (fulltext) {
queryBuilder = buildInternalQuery(entitySpec, query, true, false);
} else {
queryBuilder = buildInternalQuery(entitySpec, query, false, true);
}

return QueryBuilders.functionScoreQuery(queryBuilder, buildScoreFunctions(entitySpec))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,13 +154,13 @@ public static BoolQueryBuilder getFilterQuery(@Nullable Filter filter) {
* @param filter the search filter
* @param from index to start the search from
* @param size the number of search hits to return
* @param structured Structured or full text search modes
* @param fulltext Structured or full text search modes
* @return a valid search request
*/
@Nonnull
@WithSpan
public SearchRequest getSearchRequest(@Nonnull String input, @Nullable Filter filter,
@Nullable SortCriterion sortCriterion, int from, int size, boolean structured) {
@Nullable SortCriterion sortCriterion, int from, int size, boolean fulltext) {
SearchRequest searchRequest = new SearchRequest();
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();

Expand All @@ -170,7 +170,7 @@ public SearchRequest getSearchRequest(@Nonnull String input, @Nullable Filter fi

BoolQueryBuilder filterQuery = getFilterQuery(filter);
searchSourceBuilder.query(QueryBuilders.boolQuery()
.must(getQuery(input, structured))
.must(getQuery(input, fulltext))
.must(filterQuery));
getAggregations().forEach(searchSourceBuilder::aggregation);
searchSourceBuilder.highlighter(getHighlights());
Expand Down Expand Up @@ -228,8 +228,8 @@ public static SearchRequest getAggregationRequest(@Nonnull String field, @Nullab
return searchRequest;
}

private QueryBuilder getQuery(@Nonnull String query, boolean structured) {
return SearchQueryBuilder.buildQuery(_entitySpec, query, structured);
private QueryBuilder getQuery(@Nonnull String query, boolean fulltext) {
return SearchQueryBuilder.buildQuery(_entitySpec, query, fulltext);
}

private List<AggregationBuilder> getAggregations() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import com.linkedin.datahub.graphql.resolvers.ResolverUtils;
import com.linkedin.datahub.graphql.types.SearchableEntityType;
import com.linkedin.metadata.graph.LineageDirection;
import com.linkedin.metadata.query.SearchFlags;
import com.linkedin.metadata.search.LineageSearchResult;
import com.linkedin.metadata.search.LineageSearchService;
import com.linkedin.metadata.search.SearchResult;
Expand Down Expand Up @@ -68,7 +69,12 @@ private ESTestUtils() {

public static SearchResult search(SearchService searchService, String query) {
return searchService.searchAcrossEntities(SEARCHABLE_ENTITIES, query, null, null, 0,
100, null);
100, new SearchFlags().setFulltext(true));
}

public static SearchResult searchStructured(SearchService searchService, String query) {
return searchService.searchAcrossEntities(SEARCHABLE_ENTITIES, query, null, null, 0,
100, new SearchFlags().setFulltext(false));
}

public static LineageSearchResult lineage(LineageSearchService lineageSearchService, Urn root, int hops) {
Expand Down

0 comments on commit 656ceb6

Please sign in to comment.