Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,8 @@
*/
package org.apache.polaris.persistence.relational.jdbc;

import jakarta.annotation.Nonnull;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Locale;
import org.apache.polaris.core.persistence.bootstrap.SchemaOptions;

public enum DatabaseType {
POSTGRES("postgres"),
Expand Down Expand Up @@ -52,26 +48,16 @@ public static DatabaseType fromDisplayName(String displayName) {
* Open an InputStream that contains data from an init script. This stream should be closed by the
* caller.
*/
public InputStream openInitScriptResource(@Nonnull SchemaOptions schemaOptions) {
if (schemaOptions.schemaFile() != null) {
try {
return new FileInputStream(schemaOptions.schemaFile());
} catch (IOException e) {
throw new IllegalArgumentException("Unable to load file " + schemaOptions.schemaFile(), e);
}
} else {
final String schemaSuffix;
switch (schemaOptions.schemaVersion()) {
case null -> schemaSuffix = "schema-v3.sql";
case 1 -> schemaSuffix = "schema-v1.sql";
case 2 -> schemaSuffix = "schema-v2.sql";
case 3 -> schemaSuffix = "schema-v3.sql";
default ->
throw new IllegalArgumentException(
"Unknown schema version " + schemaOptions.schemaVersion());
}
ClassLoader classLoader = DatasourceOperations.class.getClassLoader();
return classLoader.getResourceAsStream(this.getDisplayName() + "/" + schemaSuffix);
public InputStream openInitScriptResource(int schemaVersion) {
// Preconditions check is simpler and more direct than a switch default
if (schemaVersion <= 0 || schemaVersion > 3) {
throw new IllegalArgumentException("Unknown or invalid schema version " + schemaVersion);
}

final String resourceName =
String.format("%s/schema-v%d.sql", this.getDisplayName(), schemaVersion);

ClassLoader classLoader = DatasourceOperations.class.getClassLoader();
return classLoader.getResourceAsStream(resourceName);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,13 @@ public class DatasourceOperations {

private static final Logger LOGGER = LoggerFactory.getLogger(DatasourceOperations.class);

// PG STATUS CODES
private static final String CONSTRAINT_VIOLATION_SQL_CODE = "23505";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not a blocker: do we need to handle a different code for H2? It would be nice to do so, but it's not related to this PR.

private static final String RELATION_DOES_NOT_EXIST = "42P01";

// H2 STATUS CODES
private static final String H2_RELATION_DOES_NOT_EXIST = "90079";

// POSTGRES RETRYABLE EXCEPTIONS
private static final String SERIALIZATION_FAILURE_SQL_CODE = "40001";

Expand Down Expand Up @@ -396,7 +400,9 @@ public boolean isConstraintViolation(SQLException e) {
}

public boolean isRelationDoesNotExist(SQLException e) {
return RELATION_DOES_NOT_EXIST.equals(e.getSQLState());
return (RELATION_DOES_NOT_EXIST.equals(e.getSQLState())
&& databaseType == DatabaseType.POSTGRES)
|| (H2_RELATION_DOES_NOT_EXIST.equals(e.getSQLState()) && databaseType == DatabaseType.H2);
}

private Connection borrowConnection() throws SQLException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -748,14 +748,28 @@ static int loadSchemaVersion(
}
return schemaVersion.getFirst().getValue();
} catch (SQLException e) {
LOGGER.error("Failed to load schema version due to {}", e.getMessage(), e);
if (fallbackOnDoesNotExist && datasourceOperations.isRelationDoesNotExist(e)) {
return SchemaVersion.MINIMUM.getValue();
}
LOGGER.error("Failed to load schema version due to {}", e.getMessage(), e);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor: Do we need this error message given that we throw right after it?

throw new IllegalStateException("Failed to retrieve schema version", e);
}
}

static boolean entityTableExists(DatasourceOperations datasourceOperations) {
PreparedQuery query = QueryGenerator.generateEntityTableExistQuery();
try {
List<PolarisBaseEntity> entities =
datasourceOperations.executeSelect(query, new ModelEntity());
return entities != null && !entities.isEmpty();
} catch (SQLException e) {
if (datasourceOperations.isRelationDoesNotExist(e)) {
return false;
}
throw new IllegalStateException("Failed to check if Entities table exists", e);
}
}

/** {@inheritDoc} */
@Override
public <T extends PolarisEntity & LocationBasedEntity>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.polaris.persistence.relational.jdbc;

import java.util.Optional;
import org.apache.polaris.core.persistence.bootstrap.BootstrapOptions;
import org.apache.polaris.core.persistence.bootstrap.SchemaOptions;

public class JdbcBootstrapUtils {

private JdbcBootstrapUtils() {}

/**
* Determines the correct schema version to use for bootstrapping a realm.
*
* @param currentSchemaVersion The current version of the database schema.
* @param requiredSchemaVersion The requested schema version (-1 for auto-detection).
* @param hasAlreadyBootstrappedRealms Flag indicating if any realms already exist.
* @return The calculated bootstrap schema version.
* @throws IllegalStateException if the combination of parameters represents an invalid state.
*/
public static int getRealmBootstrapSchemaVersion(
int currentSchemaVersion, int requiredSchemaVersion, boolean hasAlreadyBootstrappedRealms) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Having the hasAlreadyBootstrappedRealms parameter make the logic in this method's body hard to follow as it depends on external factors... Can we fold hasAlreadyBootstrappedRealms into this method?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIUC, the feedback is to infer hasAlreadyBootstrappedRealms inside function, though this would mean passing datasourceOperation here and making the API call to infer the same, and the if else still remain same,
am i missing something ?
The present one make testing easy and the same time keeps the logic to infer if the any of the realms bootstraps in JdbcMetastoreManagerFactory hence i wrote it this way, please let me know your thoughts considering above.

Copy link
Contributor

@dimas-b dimas-b Oct 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's more about overall confusion 😅

We're trying to figure out what version of the DDL script to run when bootstrap is called.

We check the version table and the existence of some other table and the bootstrap options.

However, from my POV, the big question is whether to run the DDL at all.

If tables exist and already contain realm A in schema X, then someone bootstraps realm B in schema Y, why would we (automatically) run DDL for Y and affect realm A?

I'd think we should deduce the current schema version (X) and if X != Y error out (or require a new "upgrade" flag in SchemaOptions).

Copy link
Contributor Author

@singhpk234 singhpk234 Oct 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If tables exist and already contain realm A in schema X, then someone bootstraps realm B in schema Y, why would we (automatically) run DDL for Y and affect realm A?

This is because schema table is not realm specific :( meaning if i had a realm in version 1 and then i bootstrap with version 2 then i set the schema version globally to 2
specially now we have schemas for example 1.0 which don't have version at all, so we deduce the value as 0 which would mean yes we did bootstrap but this 0 means 1

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer to have hasAlreadyBootstrappedRealms as an input other than embedded within this method. It make the tests much easier by passing a boolean.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please consider my previous comments non-blocking


// If versions already match, no change is needed.
if (currentSchemaVersion == requiredSchemaVersion) {
return requiredSchemaVersion;
}

// Handle fresh installations where no schema version is recorded (version 0).
if (currentSchemaVersion == 0) {
if (hasAlreadyBootstrappedRealms) {
// System was bootstrapped with v1 before schema versioning was introduced.
if (requiredSchemaVersion == -1 || requiredSchemaVersion == 1) {
return 1;
}
} else {
// A truly fresh start. Default to v3 for auto-detection, otherwise use the specified
// version.
return requiredSchemaVersion == -1 ? 3 : requiredSchemaVersion;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM, we could improve it by having a variable like latestSchemaVersion, so that we don't have to change this method every time we update the schema version.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ack, let me add an ENUM in a follow-up pr

}
}

// Handle auto-detection on an existing installation (current version > 0).
if (requiredSchemaVersion == -1) {
// Use the current version if realms already exist; otherwise, use v3 for the new realm.
return hasAlreadyBootstrappedRealms ? currentSchemaVersion : 3;
}

// Any other combination is an unhandled or invalid migration path.
throw new IllegalStateException(
String.format(
"Cannot determine bootstrap schema version. Current: %d, Required: %d, Bootstrapped: %b",
currentSchemaVersion, requiredSchemaVersion, hasAlreadyBootstrappedRealms));
}

/**
* Extracts the requested schema version from the provided BootstrapOptions.
*
* @param bootstrapOptions: The bootstrap options containing schema information from which to
* extract the version.
* @return The requested schema version, or -1 if not specified.
*/
public static int getRequestedSchemaVersion(BootstrapOptions bootstrapOptions) {
SchemaOptions schemaOptions = bootstrapOptions.schemaOptions();
if (schemaOptions != null) {
Optional<Integer> version = schemaOptions.schemaVersion();
if (version.isPresent()) {
return version.get();
}
}
return -1;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -154,12 +154,27 @@ public synchronized Map<String, PrincipalSecretsResult> bootstrapRealms(
RealmContext realmContext = () -> realm;
if (!metaStoreManagerMap.containsKey(realm)) {
DatasourceOperations datasourceOperations = getDatasourceOperations();
int currentSchemaVersion =
JdbcBasePersistenceImpl.loadSchemaVersion(
datasourceOperations,
configurationStore.getConfiguration(
realmContext, BehaviorChangeConfiguration.SCHEMA_VERSION_FALL_BACK_ON_DNE));
int requestedSchemaVersion = JdbcBootstrapUtils.getRequestedSchemaVersion(bootstrapOptions);
int effectiveSchemaVersion =
JdbcBootstrapUtils.getRealmBootstrapSchemaVersion(
currentSchemaVersion,
requestedSchemaVersion,
JdbcBasePersistenceImpl.entityTableExists(datasourceOperations));
LOGGER.info(
"Effective schema version: {} for bootstrapping realm: {}",
effectiveSchemaVersion,
realm);
try {
// Run the set-up script to create the tables.
datasourceOperations.executeScript(
datasourceOperations
.getDatabaseType()
.openInitScriptResource(bootstrapOptions.schemaOptions()));
.openInitScriptResource(effectiveSchemaVersion));
} catch (SQLException e) {
throw new RuntimeException(
String.format("Error executing sql script: %s", e.getMessage()), e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,14 @@ static PreparedQuery generateVersionQuery() {
return new PreparedQuery("SELECT version_value FROM POLARIS_SCHEMA.VERSION", List.of());
}

@VisibleForTesting
static PreparedQuery generateEntityTableExistQuery() {
return new PreparedQuery(
String.format(
"SELECT * FROM %s LIMIT 1", getFullyQualifiedTableName(ModelEntity.TABLE_NAME)),
List.of());
}

/**
* Generate a SELECT query to find any entities that have a given realm &amp; parent and that may
* overlap with a given location. The check is performed without consideration for the scheme, so
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ CREATE TABLE IF NOT EXISTS version (

MERGE INTO version (version_key, version_value)
KEY (version_key)
VALUES ('version', 2);
VALUES ('version', 3);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

spent last night chasing test failures due to this :'(

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems a copy-paste error. I guess we could add a test to validate it. We could probably do that in another PR.


-- H2 supports COMMENT, but some modes may ignore it
COMMENT ON TABLE version IS 'the version of the JDBC schema in use';
Expand Down
Loading