Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-4952 - Introduce FedXConfig overrides for FedXRepositoryConfig #4953

Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
/*******************************************************************************
* Copyright (c) 2024 Eclipse RDF4J contributors.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
package org.eclipse.rdf4j.federated.repository;

import static org.eclipse.rdf4j.federated.repository.FedXRepositoryConfig.NAMESPACE;

import org.eclipse.rdf4j.federated.FedXConfig;
import org.eclipse.rdf4j.model.BNode;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Model;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.ValueFactory;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.eclipse.rdf4j.model.util.Models;
import org.eclipse.rdf4j.model.util.Values;
import org.eclipse.rdf4j.repository.config.RepositoryConfigException;

/**
* A parser & exporter of {@link FedXConfig} to fine-tune FedX repositories when configured via
* {@link FedXRepositoryConfig}.
*
* @author Iotic Labs
*/
public class FedXConfigParser {

private static final ValueFactory vf = SimpleValueFactory.getInstance();

/**
* IRI of the property populating {@link FedXConfig#getEnforceMaxQueryTime()}
*/
public static final IRI CONFIG_ENFORCE_MAX_QUERY_TIME = vf.createIRI(NAMESPACE, "enforceMaxQueryTime");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

f.y.i: nowadays in RDF4J instead of ValueFactory one could simply use Values.iri(NAMESPACE, "enforceMaxQueryTime")

For me OK to stick with the ValueFactory

Copy link
Contributor Author

@vtermanis vtermanis Apr 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good to know 👍

Happy to update or, based on your comment, do you prefer it "looks" the same as FedXRepositoryConfig does?

Edit: Right - with the other change this will be N/A since they'll all use the ValueFactory approach.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


/**
* IRI of the property populating {@link FedXConfig#isEnableMonitoring()}
*/
public static final IRI CONFIG_ENABLE_MONITORING = vf.createIRI(NAMESPACE, "enableMonitoring");

/**
* IRI of the property populating {@link FedXConfig#isLogQueryPlan()}
*/
public static final IRI CONFIG_LOG_QUERY_PLAN = vf.createIRI(NAMESPACE, "logQueryPlan");

/**
* IRI of the property populating {@link FedXConfig#isDebugQueryPlan()}
*/
public static final IRI CONFIG_DEBUG_QUERY_PLAN = vf.createIRI(NAMESPACE, "debugQueryPlan");

/**
* IRI of the property populating {@link FedXConfig#isLogQueries()}
*/
public static final IRI CONFIG_LOG_QUERIES = vf.createIRI(NAMESPACE, "logQueries");

/**
* IRI of the property populating {@link FedXConfig#getSourceSelectionCacheSpec()}
*/
public static final IRI CONFIG_SOURCE_SELECTION_CACHE_SPEC = vf.createIRI(NAMESPACE, "sourceSelectionCacheSpec");

private FedXConfigParser() {
}

/**
* Updates the provided {@link FedXConfig} with properties from the supplied model.
*
* @param config the configuration to be amended.
* @param m the model from which to read configuration properties
* @param confNode the subject against which to expect {@link FedXConfig} overrides.
*
* @return The updated {@link FedXConfig}
*
* @throws RepositoryConfigException if any of the overridden fields are deemed to be invalid
*/
public static FedXConfig parse(FedXConfig config, Model m, Resource confNode) throws RepositoryConfigException {
Models.objectLiteral(m.getStatements(confNode, CONFIG_ENFORCE_MAX_QUERY_TIME, null))
.ifPresent(value -> config.withEnforceMaxQueryTime(value.intValue()));

Models.objectLiteral(m.getStatements(confNode, CONFIG_ENABLE_MONITORING, null))
.ifPresent(value -> config.withEnableMonitoring(value.booleanValue()));

Models.objectLiteral(m.getStatements(confNode, CONFIG_LOG_QUERY_PLAN, null))
.ifPresent(value -> config.withLogQueryPlan(value.booleanValue()));

Models.objectLiteral(m.getStatements(confNode, CONFIG_DEBUG_QUERY_PLAN, null))
.ifPresent(value -> config.withDebugQueryPlan(value.booleanValue()));

Models.objectLiteral(m.getStatements(confNode, CONFIG_LOG_QUERIES, null))
.ifPresent(value -> config.withLogQueries(value.booleanValue()));

Models.objectLiteral(m.getStatements(confNode, CONFIG_SOURCE_SELECTION_CACHE_SPEC, null))
.ifPresent(value -> config.withSourceSelectionCacheSpec(value.stringValue()));

return config;
}

/**
* Export the provided {@link FedXConfig} to its RDF representation.
*
* @param config the configuration to export
* @param m the model to which to write configuration properties
*
* @return the node against which the configuration has been written
*/
public static Resource export(FedXConfig config, Model m) {
BNode confNode = Values.bnode();

m.add(confNode, CONFIG_ENFORCE_MAX_QUERY_TIME, vf.createLiteral(config.getEnforceMaxQueryTime()));

m.add(confNode, CONFIG_ENABLE_MONITORING, vf.createLiteral(config.isEnableMonitoring()));

m.add(confNode, CONFIG_LOG_QUERY_PLAN, vf.createLiteral(config.isLogQueryPlan()));

m.add(confNode, CONFIG_DEBUG_QUERY_PLAN, vf.createLiteral(config.isDebugQueryPlan()));

m.add(confNode, CONFIG_LOG_QUERIES, vf.createLiteral(config.isLogQueries()));

if (config.getSourceSelectionCacheSpec() != null) {
m.add(confNode, CONFIG_SOURCE_SELECTION_CACHE_SPEC, vf.createLiteral(config.getSourceSelectionCacheSpec()));
}

return confNode;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@
* # optionally define data config
* #fedx:fedxConfig "fedxConfig.prop" ;
* fedx:dataConfig "dataConfig.ttl" ;
*
* # optionally define FedXConfig overrides
* fedx:config [
* fedx:sourceSelectionCacheSpec "maximumSize=0" ;
* fedx:enforceMaxQueryTime 30 ;
* ]
* ];
* rep:repositoryID "fedx" ;
* rdfs:label "FedX Federation" .
Expand Down Expand Up @@ -87,6 +93,11 @@ public class FedXRepositoryConfig extends AbstractRepositoryImplConfig {
*/
public static final IRI DATA_CONFIG = vf.createIRI(NAMESPACE, "dataConfig");

/**
* IRI of the property pointing to the {@link FedXConfig}
*/
public static final IRI FEDX_CONFIG = vf.createIRI(NAMESPACE, "config");

/**
* IRI of the property pointing to a federation member node
*/
Expand Down Expand Up @@ -152,6 +163,11 @@ public Resource export(Model m) {
m.add(implNode, DATA_CONFIG, vf.createLiteral(getDataConfig()));
}

if (getConfig() != null) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will mean: any externally set FedXConfig (e.g. via code) will be serialized into the repository configuration, e.g. when saving it.

I think overall (from history) there is a bit of technical design issue in the FedXRepositoryConfig and FedXConfig: the FedXConfig mixes "string" like options and things that can only be set via code (e.g. the TaskWrapper).

For our product use-case: we are having an extension of FedXConfig (with some proprietary options, and also a TaskWrapper) and we are setting this config via code. We generally would not want to see it exported to the repository config.

So maybe to retain compatibility on that level, we can use one of the following options:

a) introduce another boolean field (say isExternalConfig) and only export if it is not externally provided
b) put the handling of the FedXConfig for both parsing and exporting to a protected method, which can be overridden in customizations of the repository config

I think option b) is cleaner an will allow most flexibility and still retain compatibility.

So the idea would be to move your new exporting/parsing part of the code from this class to new protected methods

  • protected void exportFedXConfig(Model model, Resource implNode)
  • protected void parseFedXConfig(Model model, Resource implNode)

and call those in the current places.

Does this make sense to you?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes - that sounds good. I'll have a go.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On second thought - the proposed method signatures allow sub-classes to do whatever they want (be it config-related or not). Would it not make more sense to pass them the node which contains the config (like is done in the separate class) - separation of concerns and all that.
And e.g. allow new FedXConfig(ConfigReader) to inject a custom or sub-classed implementation?

(But I also appreciate that this probably doesn't need to be over-engineered.)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Resource confNode = FedXConfigParser.export(getConfig(), m);
m.add(implNode, FEDX_CONFIG, confNode);
}

if (getMembers() != null) {

Model members = getMembers();
Expand Down Expand Up @@ -187,6 +203,14 @@ public void parse(Model m, Resource implNode) throws RepositoryConfigException {
Models.objectLiteral(m.getStatements(implNode, DATA_CONFIG, null))
.ifPresent(value -> setDataConfig(value.stringValue()));

Models.objectResource(m.getStatements(implNode, FEDX_CONFIG, null))
.ifPresent(res -> {
if (getConfig() == null) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: I chose to do this so the previous behaviour is preserved:

  1. If no FedXConfig has been provided (programmatically or via template) then it will be null
  2. When exporting, FedXConfig is not included (when null) rather than being expanded into defaults (see export above)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good considerations! Please see my comments for the exporting

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed with the other suggestion: 5a160df

setConfig(new FedXConfig());
}
setConfig(FedXConfigParser.parse(getConfig(), m, res));
});

Set<Value> memberNodes = m.filter(implNode, MEMBER, null).objects();
if (!memberNodes.isEmpty()) {
Model members = new TreeModel();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
/*******************************************************************************
* Copyright (c) 2024 Eclipse RDF4J contributors.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
package org.eclipse.rdf4j.federated.repository;

import static org.assertj.core.api.Assertions.assertThat;

import java.io.InputStream;

import org.eclipse.rdf4j.federated.FedXConfig;
import org.eclipse.rdf4j.model.Model;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.impl.TreeModel;
import org.eclipse.rdf4j.model.util.Models;
import org.eclipse.rdf4j.model.util.Values;
import org.eclipse.rdf4j.rio.RDFFormat;
import org.eclipse.rdf4j.rio.Rio;
import org.junit.jupiter.api.Test;

public class FedXConfigParserTest {

@Test
public void testParse() throws Exception {
Model model = readConfig("/tests/rdf4jserver/config-fedXConfig-only.ttl");

FedXConfig config = FedXConfigParser.parse(new FedXConfig(), model, Values.iri("http://example.org/conf"));

assertThat(config.getEnforceMaxQueryTime()).isEqualTo(1234);
assertThat(config.isEnableMonitoring()).isTrue();
assertThat(config.isLogQueryPlan()).isTrue();
assertThat(config.isDebugQueryPlan()).isTrue();
assertThat(config.isLogQueries()).isTrue();
assertThat(config.getSourceSelectionCacheSpec()).isEqualTo("spec-goes-here");
}

@Test
public void testParseWithEmptyConfig() throws Exception {
Model model = new TreeModel();

FedXConfig config = FedXConfigParser.parse(new FedXConfig(), model, Values.iri("http://example.org/conf"));

// expecting defaults
assertThat(config.getEnforceMaxQueryTime()).isEqualTo(30);
assertThat(config.isEnableMonitoring()).isFalse();
assertThat(config.isLogQueryPlan()).isFalse();
assertThat(config.isDebugQueryPlan()).isFalse();
assertThat(config.isLogQueries()).isFalse();
assertThat(config.getSourceSelectionCacheSpec()).isNull();
}

@Test
public void testExport() throws Exception {
Model model = readConfig("/tests/rdf4jserver/config-fedXConfig-only.ttl");

FedXConfig config = FedXConfigParser.parse(new FedXConfig(), model, Values.iri("http://example.org/conf"));

Model export = new TreeModel();
Resource configNode = FedXConfigParser.export(config, export);

assertThat(export.filter(configNode, null, null)).hasSize(6);

assertThat(
Models.objectLiteral(
export.getStatements(configNode, FedXConfigParser.CONFIG_ENFORCE_MAX_QUERY_TIME, null)))
.hasValueSatisfying(v -> assertThat(v.intValue()).isEqualTo(1234));
assertThat(
Models.objectLiteral(
export.getStatements(configNode, FedXConfigParser.CONFIG_ENABLE_MONITORING, null)))
.hasValueSatisfying(v -> assertThat(v.booleanValue()).isTrue());
assertThat(
Models.objectLiteral(
export.getStatements(configNode, FedXConfigParser.CONFIG_LOG_QUERY_PLAN, null)))
.hasValueSatisfying(v -> assertThat(v.booleanValue()).isTrue());
assertThat(
Models.objectLiteral(
export.getStatements(configNode, FedXConfigParser.CONFIG_DEBUG_QUERY_PLAN, null)))
.hasValueSatisfying(v -> assertThat(v.booleanValue()).isTrue());
assertThat(
Models.objectLiteral(
export.getStatements(configNode, FedXConfigParser.CONFIG_LOG_QUERIES, null)))
.hasValueSatisfying(v -> assertThat(v.booleanValue()).isTrue());
assertThat(
Models.objectLiteral(
export.getStatements(configNode, FedXConfigParser.CONFIG_SOURCE_SELECTION_CACHE_SPEC, null)))
.hasValueSatisfying(v -> assertThat(v.stringValue()).isEqualTo("spec-goes-here"));
}

@Test
public void testExportWithEmptyConfig() throws Exception {
Model export = new TreeModel();
Resource configNode = FedXConfigParser.export(new FedXConfig(), export);

// Note: 5 instead of 6 since CONFIG_SOURCE_SELECTION_CACHE_SPEC is null and thus should not be populated
assertThat(export.filter(configNode, null, null)).hasSize(5);

assertThat(
Models.objectLiteral(
export.getStatements(configNode, FedXConfigParser.CONFIG_ENFORCE_MAX_QUERY_TIME, null)))
.hasValueSatisfying(v -> assertThat(v.intValue()).isEqualTo(30));
assertThat(
Models.objectLiteral(
export.getStatements(configNode, FedXConfigParser.CONFIG_ENABLE_MONITORING, null)))
.hasValueSatisfying(v -> assertThat(v.booleanValue()).isFalse());
assertThat(
Models.objectLiteral(
export.getStatements(configNode, FedXConfigParser.CONFIG_LOG_QUERY_PLAN, null)))
.hasValueSatisfying(v -> assertThat(v.booleanValue()).isFalse());
assertThat(
Models.objectLiteral(
export.getStatements(configNode, FedXConfigParser.CONFIG_DEBUG_QUERY_PLAN, null)))
.hasValueSatisfying(v -> assertThat(v.booleanValue()).isFalse());
assertThat(
Models.objectLiteral(
export.getStatements(configNode, FedXConfigParser.CONFIG_LOG_QUERIES, null)))
.hasValueSatisfying(v -> assertThat(v.booleanValue()).isFalse());
}

protected Model readConfig(String configResource) throws Exception {
try (InputStream in = FedXRepositoryConfigTest.class.getResourceAsStream(configResource)) {
return Rio.parse(in, "http://example.org/", RDFFormat.TURTLE);
}
}
}
Loading
Loading