Skip to content

Commit

Permalink
1. Corrected issues based on review. 2. Added documentation on this f…
Browse files Browse the repository at this point in the history
…eature.
  • Loading branch information
an1310 committed Jan 23, 2024
1 parent 23981a2 commit 1a74151
Show file tree
Hide file tree
Showing 12 changed files with 84 additions and 87 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,41 @@ entities (things/policies) and no-one other:

These system properties would have to be configured for the "things" and "policies" services.

## Limiting Indexed Fields

The default behavior of Ditto is to index the complete JSON of a thing, which includes all its attributes and features. This may not be desired behavior for certain use cases:
* Increased load on the search database, leading to performance degradation and increased database cost.
* Only a few fields are ever used for searching.

In Ditto *3.5.0*, there is now configuration to specify, by namespace, which fields will be included in the search database.

To enable this functionality, there are two new options in the `thing-search.conf` configuration:

```
ditto {
...
caching-signal-enrichment-facade-provider = org.eclipse.ditto.thingsearch.service.persistence.write.streaming.SearchIndexingSignalEnrichmentFacadeProvider
...
search {
namespace-search-include-fields = [
{
namespace = "org.eclipse",
search-include-fields = [ "attributes", "features/info" ]
},
{
namespace = "org.eclipse.test",
search-include-fields = [ "attributes", "features/info/properties/", "features/info/other" ]
}
]
}
```

There is a new implementation of the caching signal enrichment facade provider that must be configured to enable this functionality.

For each namespace, only the selected fields are included in the search database. In the example above, for things in the "org.eclipse" namespace, only the "attributes" and "features/info" paths will be the only fields indexed in the search database. For things in the "org.eclipse.test" namespace, the fields indexed in the search database will only be "attributes", "features/info/properties", and "features/info/other".

NOTE: Ditto will automatically add the system-level fields it needs to operate, so no manual configuration of these is necessary.

## Logging

Gathering logs for a running Ditto installation can be achieved by:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,14 +96,17 @@ public CompletionStage<JsonObject> retrieveThing(final ThingId thingId, final Li
final long minAcceptableSeqNr) {

final DittoHeaders dittoHeaders = DittoHeaders.empty();

JsonFieldSelector fieldSelector = determineSelector(thingId.getNamespace());

if (minAcceptableSeqNr < 0) {
final var cacheKey =
SignalEnrichmentCacheKey.of(thingId, SignalEnrichmentContext.of(dittoHeaders, null));
SignalEnrichmentCacheKey.of(thingId, SignalEnrichmentContext.of(dittoHeaders, fieldSelector));
extraFieldsCache.invalidate(cacheKey);
return doCacheLookup(cacheKey, dittoHeaders);
} else {
final var cachingParameters =
new CachingParameters(null, events, false, minAcceptableSeqNr);
new CachingParameters(fieldSelector, events, false, minAcceptableSeqNr);

return doRetrievePartialThing(thingId, dittoHeaders, cachingParameters);
}
Expand Down Expand Up @@ -446,6 +449,12 @@ private JsonObject enhanceJsonObject(final JsonObject jsonObject, final List<Thi
return applyJsonFieldSelector(jsonObjectBuilder.build(), enhancedFieldSelector);
}

@Nullable
protected JsonFieldSelector determineSelector(String namespace) {
// By default, we do not return a field selector.
return null;
}

protected static final class CachingParameters {

@Nullable private final JsonFieldSelector fieldSelector;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,23 @@
import org.eclipse.ditto.things.model.ThingId;
import org.eclipse.ditto.things.model.signals.events.ThingEvent;

import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CompletionStage;
import java.util.concurrent.Executor;

import static org.eclipse.ditto.base.model.common.ConditionChecker.checkNotNull;

public class SearchIndexingSignalEnrichmentFacade extends DittoCachingSignalEnrichmentFacade {
/**
* Extension of {@code DittoCachingSignalEnrichmentFacade} that allows a selected map of selected indexes grouped by
* namespace to be added to the signal enrichment cache.
*/
public final class SearchIndexingSignalEnrichmentFacade extends DittoCachingSignalEnrichmentFacade {

private final Map<String, JsonFieldSelector> selectedIndexes;

protected SearchIndexingSignalEnrichmentFacade(
private SearchIndexingSignalEnrichmentFacade(
final Map<String, JsonFieldSelector> selectedIndexes,
final SignalEnrichmentFacade cacheLoaderFacade,
final CacheConfig cacheConfig,
Expand All @@ -40,7 +45,7 @@ protected SearchIndexingSignalEnrichmentFacade(

super(cacheLoaderFacade, cacheConfig, cacheLoaderExecutor, cacheNamePrefix);

this.selectedIndexes = selectedIndexes;
this.selectedIndexes = Collections.unmodifiableMap(selectedIndexes);
}

/**
Expand Down Expand Up @@ -69,24 +74,7 @@ public static SearchIndexingSignalEnrichmentFacade newInstance(
}

@Override
public CompletionStage<JsonObject> retrieveThing(final ThingId thingId, final List<ThingEvent<?>> events, final long minAcceptableSeqNr) {

final DittoHeaders dittoHeaders = DittoHeaders.empty();

// Retrieve any namespace definition from the configuration. Note that this might return null.
JsonFieldSelector selector = selectedIndexes.get(thingId.getNamespace());

if (minAcceptableSeqNr < 0) {

final var cacheKey =
SignalEnrichmentCacheKey.of(thingId, SignalEnrichmentContext.of(dittoHeaders, selector));
extraFieldsCache.invalidate(cacheKey);
return doCacheLookup(cacheKey, dittoHeaders);
} else {
final var cachingParameters =
new CachingParameters(selector, events, false, minAcceptableSeqNr);

return doRetrievePartialThing(thingId, dittoHeaders, cachingParameters);
}
protected JsonFieldSelector determineSelector(String namespace) {
return selectedIndexes.get(namespace);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
* Implementation for a {@link CacheKey} used in scope of signal enrichment.
*/
@Immutable
public final class SignalEnrichmentCacheKey implements CacheKey<SignalEnrichmentContext> {
final class SignalEnrichmentCacheKey implements CacheKey<SignalEnrichmentContext> {

static final String DELIMITER = ":";

Expand All @@ -47,8 +47,7 @@ private SignalEnrichmentCacheKey(final EntityId id, @Nullable final SignalEnrich
* @return the entity ID with resource type object.
* @throws NullPointerException if {@code id} is {@code null}.
*/
public static SignalEnrichmentCacheKey of(final EntityId id,
@Nullable final SignalEnrichmentContext cacheLookupContext) {
static SignalEnrichmentCacheKey of(final EntityId id, @Nullable final SignalEnrichmentContext cacheLookupContext) {
return new SignalEnrichmentCacheKey(id, cacheLookupContext);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
* signal enrichment caching.
*/
@Immutable
public final class SignalEnrichmentContext implements CacheLookupContext {
final class SignalEnrichmentContext implements CacheLookupContext {

private final DittoHeaders dittoHeaders;
@Nullable private final JsonFieldSelector jsonFieldSelector;
Expand All @@ -48,8 +48,8 @@ private SignalEnrichmentContext(final DittoHeaders dittoHeaders,
* @param jsonFieldSelector the JsonFieldSelector to use in the cache lookup context.
* @return the created context.
*/
public static SignalEnrichmentContext of(final DittoHeaders dittoHeaders,
@Nullable final JsonFieldSelector jsonFieldSelector) {
static SignalEnrichmentContext of(final DittoHeaders dittoHeaders,
@Nullable final JsonFieldSelector jsonFieldSelector) {

return new SignalEnrichmentContext(dittoHeaders, jsonFieldSelector);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@
*/
public final class DefaultNamespaceSearchIndexConfig implements NamespaceSearchIndexConfig {

private static final String CONFIG_PATH = "namespace-search-include-fields";

private final String namespace;

private final List<String> searchIncludeFields;
Expand All @@ -57,7 +55,6 @@ private DefaultNamespaceSearchIndexConfig(final String namespace, final Collecti
/**
* Returns an instance of {@code DefaultNamespaceSearchIndexConfig} based on the settings of the specified Config.
*
* @param namespace the namespace config passed in the {@code config}.
* @param config is supposed to provide the config for the issuer at its current level.
* @return the instance.
*/
Expand All @@ -66,25 +63,6 @@ public static DefaultNamespaceSearchIndexConfig of(final Config config) {
ConfigWithFallback.newInstance(config, NamespaceSearchIndexConfigValue.values()));
}

/**
* Returns a new SubjectIssuerConfig based on the provided strings.
*
* @param namespace the list of the namespace {@code namespace}.
* @param fields list of search index strings.
* @return a new DefaultNamespaceSearchIndexConfig.
* @throws NullPointerException if {@code namespace} or {@code fields} is {@code null}.
* @throws IllegalArgumentException if {@code namespace} or {@code fields} is empty.
*/
public static DefaultNamespaceSearchIndexConfig of(
final String namespace,
final Collection<String> fields) {
checkNotNull(namespace, "namespace");
checkNotEmpty(namespace, "namespace");
argumentNotEmpty(fields, "fields");

return new DefaultNamespaceSearchIndexConfig(namespace, fields);
}

@Override
public String getNamespace() {
return namespace;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

/**
* Provides configuration settings of the namespace-scoped search indexes.
* @since 3.5.0
*/
@Immutable
public interface NamespaceSearchIndexConfig {
Expand All @@ -29,15 +30,13 @@ public interface NamespaceSearchIndexConfig {
* Returns the namespace definition.
*
* @return the namespace definition
* @since 3.4
*/
String getNamespace();

/**
* Returns a list of fields that will be explicitly included in the search index.
*
* @return the search projection fields.
* @since 3.4
*/
List<String> getSearchIncludeFields();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ public interface SearchConfig extends ServiceSpecificConfig, WithHealthCheckConf
* Returns a map of fields scoped by namespaces that will be explicitly included in the search index.
*
* @return the search projection fields.
* @since 3.4
* @since 3.5.0
*/
List<NamespaceSearchIndexConfig> getNamespaceSearchIncludeFields();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,7 @@

import org.apache.pekko.actor.ActorSystem;
import org.eclipse.ditto.internal.utils.config.DefaultScopedConfig;
import org.eclipse.ditto.json.JsonField;
import org.eclipse.ditto.json.JsonFieldDefinition;
import org.eclipse.ditto.json.JsonFieldSelector;
import org.eclipse.ditto.json.JsonPointer;
import org.eclipse.ditto.json.*;
import org.eclipse.ditto.things.model.Thing;

import org.eclipse.ditto.thingsearch.service.common.config.DittoSearchConfig;
Expand Down Expand Up @@ -68,30 +65,19 @@ public CachingSignalEnrichmentFacade getSignalEnrichmentFacade(
DittoSearchConfig.of(DefaultScopedConfig.dittoScoped(actorSystem.settings().config()));

// Build a map of field selectors for the enrichment facade to use to quickly look up by Thing namespace.
Map<String, JsonFieldSelector> namespaceToFieldSelector = new HashMap<>();
final Map<String, JsonFieldSelector> namespaceToFieldSelector = new HashMap<>();

for (NamespaceSearchIndexConfig namespaceConfig : searchConfig.getNamespaceSearchIncludeFields()) {
for (final NamespaceSearchIndexConfig namespaceConfig : searchConfig.getNamespaceSearchIncludeFields()) {

if (!namespaceConfig.getSearchIncludeFields().isEmpty()) {

List<String> searchIncludeFields = namespaceConfig.getSearchIncludeFields();

// Ensure the list has the required fields needed for the search to work.
Set<String> set = new HashSet<>(searchIncludeFields);
final Set<String> set = new HashSet<>(namespaceConfig.getSearchIncludeFields());
set.addAll(REQUIRED_INDEXED_FIELDS.stream().map(JsonFieldDefinition::getPointer).map(JsonPointer::toString).toList());

searchIncludeFields = new ArrayList<>(set);

// Extract the first element
CharSequence pointerString = searchIncludeFields.get(0);

// Prepare the rest of the elements for varargs
CharSequence[] furtherPointerStrings = new CharSequence[searchIncludeFields.size() - 1];
for (int i = 1; i < searchIncludeFields.size(); i++) {
furtherPointerStrings[i - 1] = searchIncludeFields.get(i);
}
final List<String> searchIncludeFields = new ArrayList<>(set);

JsonFieldSelector indexedFields = JsonFieldSelector.newInstance(pointerString, furtherPointerStrings);
JsonFieldSelector indexedFields = JsonFactory.newFieldSelector(searchIncludeFields, JsonParseOptions.newBuilder().build());

namespaceToFieldSelector.put(namespaceConfig.getNamespace(), indexedFields);
}
Expand Down
13 changes: 13 additions & 0 deletions thingsearch/service/src/main/resources/search.conf
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ ditto {
search-update-observer = org.eclipse.ditto.thingsearch.service.updater.actors.DefaultSearchUpdateObserver
query-criteria-validator = org.eclipse.ditto.thingsearch.service.persistence.query.validation.DefaultQueryCriteriaValidator
caching-signal-enrichment-facade-provider = org.eclipse.ditto.thingsearch.service.persistence.write.streaming.DittoCachingSignalEnrichmentFacadeProvider
# The below is an alternate implementation of the caching-signal-enrichment-facade-provider that enables
# only indexing selected JSON paths by namespace.
# caching-signal-enrichment-facade-provider = org.eclipse.ditto.thingsearch.service.persistence.write.streaming.SearchIndexingSignalEnrichmentFacadeProvider
search-update-mapper = org.eclipse.ditto.thingsearch.service.persistence.write.streaming.DefaultSearchUpdateMapper
}

Expand Down Expand Up @@ -64,6 +67,16 @@ ditto {
enabled = ${?INDEX_INITIALIZATION_ENABLED}
}

# This configuration is used in conjunction with the new caching-signal-enrichment-facade-provider implementation
# to only index a selected array of JSON pointers scoped by namespace.
namespace-search-include-fields = [
# Example: For the namespace "org.eclipse", only the "attributes" and "features/info" will be indexed in the
# search database.
# {
# namespace = "org.eclipse",
# search-include-fields = [ "attributes", "features/info" ]
# }
]

query {
persistence {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,22 +82,14 @@ public void underTestReturnsValuesOfConfigFile() {
softly.assertThat(first.getSearchIncludeFields())
.as(NamespaceSearchIndexConfig.NamespaceSearchIndexConfigValue.SEARCH_INCLUDE_FIELDS.getConfigPath())
.isEqualTo(
List.of(
"attributes",
"features/info"
)
);
List.of("attributes", "features/info"));

// Second config
softly.assertThat(second.getNamespace()).isEqualTo("org.eclipse.test");

softly.assertThat(second.getSearchIncludeFields())
.as(NamespaceSearchIndexConfig.NamespaceSearchIndexConfigValue.SEARCH_INCLUDE_FIELDS.getConfigPath())
.isEqualTo(
List.of(
"attributes",
"features/info/properties/",
"features/info/other" )
);
List.of("attributes", "features/info/properties/", "features/info/other"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,6 @@ public void testQueryPersistenceConfig() {
final var config = ConfigFactory.load("search-test.conf");
final var underTest = DittoSearchConfig.of(DefaultScopedConfig.dittoScoped(config));

//assertThat(underTest.getSearchIncludeFields().isEmpty()).isTrue();

final var queryPersistenceConfig = underTest.getQueryPersistenceConfig();
assertThat(queryPersistenceConfig.readConcern()).isEqualTo(ReadConcern.LINEARIZABLE);
assertThat(queryPersistenceConfig.readPreference()).isEqualTo(ReadPreference.NEAREST);
Expand Down

0 comments on commit 1a74151

Please sign in to comment.