From cfa654aa4a4e33e935b9fc7167f0bf410ab2bade Mon Sep 17 00:00:00 2001 From: fabrizzio-dotCMS Date: Fri, 17 Apr 2026 11:38:40 -0600 Subject: [PATCH 1/5] fix(opensearch): make shadow write log level configurable and add fire-and-forget tests (#35302) Introduces DOTCMS_SHADOW_WRITE_LOG_LEVEL (default WARN) so operators can raise shadow OS write failures to ERROR during QA or lower them to DEBUG during steady-state migration, without code changes. Adds PhaseRouterTest covering all four migration phases with the mismatched-index-name scenario. Co-Authored-By: Claude Sonnet 4.6 --- .../business/ContentletIndexAPIImpl.java | 3 +- .../content/index/IndexConfigHelper.java | 30 ++ .../com/dotcms/content/index/PhaseRouter.java | 8 +- .../common/reindex/BulkProcessorListener.java | 8 +- .../resources/dotmarketing-config.properties | 11 + .../dotcms/content/index/PhaseRouterTest.java | 340 ++++++++++++++++++ 6 files changed, 393 insertions(+), 7 deletions(-) create mode 100644 dotCMS/src/test/java/com/dotcms/content/index/PhaseRouterTest.java diff --git a/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java b/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java index 76cd1f190af4..845a68821823 100644 --- a/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java +++ b/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java @@ -4,6 +4,7 @@ import static com.dotcms.content.index.IndexConfigHelper.isMigrationNotStarted; import static com.dotcms.content.index.IndexConfigHelper.isMigrationStarted; import static com.dotcms.content.index.IndexConfigHelper.isReadEnabled; +import static com.dotcms.content.index.IndexConfigHelper.logShadowWriteFailure; import static com.dotmarketing.util.StringUtils.builder; import com.dotcms.api.system.event.message.MessageSeverity; @@ -371,7 +372,7 @@ public void close() throws Exception { } catch (final Exception e) { if (entry.shadow) { // OS shadow write — fire-and-forget: log divergence, do not propagate. - Logger.warnAndDebug(CompositeBulkProcessor.class, + logShadowWriteFailure(CompositeBulkProcessor.class, "OS shadow processor failed to flush on close — ES flush succeeded; " + "OS index may diverge until next reindex. Cause: " + e.getMessage(), e); diff --git a/dotCMS/src/main/java/com/dotcms/content/index/IndexConfigHelper.java b/dotCMS/src/main/java/com/dotcms/content/index/IndexConfigHelper.java index 3168dc688ef1..f2fa38f6e214 100644 --- a/dotCMS/src/main/java/com/dotcms/content/index/IndexConfigHelper.java +++ b/dotCMS/src/main/java/com/dotcms/content/index/IndexConfigHelper.java @@ -3,6 +3,7 @@ import com.dotcms.content.index.opensearch.OSIndexProperty; import com.dotcms.featureflag.FeatureFlagName; import com.dotmarketing.util.Config; +import com.dotmarketing.util.Logger; /** * Central helper for reading index-layer configuration properties. @@ -35,6 +36,35 @@ */ public interface IndexConfigHelper { + /** + * Config key controlling the log level for OS shadow write failures in dual-write phases. + * + *

Valid values: {@code DEBUG}, {@code INFO}, {@code WARN}, {@code ERROR} (default: {@code WARN}). + * Set to {@code ERROR} or {@code DEBUG} to increase/decrease visibility during migration QA.

+ */ + String SHADOW_WRITE_LOG_LEVEL_KEY = "DOTCMS_SHADOW_WRITE_LOG_LEVEL"; + + /** + * Logs an OS shadow write failure at the level configured by + * {@value #SHADOW_WRITE_LOG_LEVEL_KEY} (default: {@code WARN}). + * + * @param clazz the class to attribute the log entry to + * @param message the log message + * @param t the throwable, or {@code null} if none + */ + static void logShadowWriteFailure(final Class clazz, + final String message, + final Throwable t) { + final String level = Config.getStringProperty(SHADOW_WRITE_LOG_LEVEL_KEY, "WARN") + .toUpperCase(); + switch (level) { + case "DEBUG": Logger.debug(clazz, message, t); break; + case "INFO": Logger.info(clazz, message); break; + case "ERROR": Logger.error(clazz, message, t); break; + default: Logger.warn(clazz, message, t); break; + } + } + // ------------------------------------------------------------------------- // Migration phase // ------------------------------------------------------------------------- diff --git a/dotCMS/src/main/java/com/dotcms/content/index/PhaseRouter.java b/dotCMS/src/main/java/com/dotcms/content/index/PhaseRouter.java index 905dc4c1f6e3..25298ed286d6 100644 --- a/dotCMS/src/main/java/com/dotcms/content/index/PhaseRouter.java +++ b/dotCMS/src/main/java/com/dotcms/content/index/PhaseRouter.java @@ -4,6 +4,8 @@ import static com.dotcms.content.index.IndexConfigHelper.isMigrationNotStarted; import static com.dotcms.content.index.IndexConfigHelper.isReadEnabled; +import static com.dotcms.content.index.IndexConfigHelper.logShadowWriteFailure; + import com.dotmarketing.util.Logger; import java.util.List; import java.util.function.Consumer; @@ -246,7 +248,7 @@ public boolean writeBoolean(final Function fn) { if (impl == primary) { primaryEx = e; } else { - Logger.warn(PhaseRouter.class, + logShadowWriteFailure(PhaseRouter.class, "Shadow write failed (fire-and-forget in dual-write phase): " + e.getMessage(), e); } @@ -332,7 +334,7 @@ public void writeChecked(final ThrowingConsumer action) throws Exception { if (impl == primary) { primaryEx = e; // record — shadow must still be called } else { - Logger.warn(PhaseRouter.class, + logShadowWriteFailure(PhaseRouter.class, "Shadow write failed (fire-and-forget in dual-write phase): " + e.getMessage(), e); } @@ -372,7 +374,7 @@ public R writeReturningChecked(final ThrowingFunction fn) throws Excep try { fn.apply(shadow); } catch (Exception e) { - Logger.warn(PhaseRouter.class, + logShadowWriteFailure(PhaseRouter.class, "Shadow write failed (fire-and-forget in dual-write phase): " + e.getMessage(), e); } diff --git a/dotCMS/src/main/java/com/dotmarketing/common/reindex/BulkProcessorListener.java b/dotCMS/src/main/java/com/dotmarketing/common/reindex/BulkProcessorListener.java index 74ab14186e64..c3420f8491b1 100644 --- a/dotCMS/src/main/java/com/dotmarketing/common/reindex/BulkProcessorListener.java +++ b/dotCMS/src/main/java/com/dotmarketing/common/reindex/BulkProcessorListener.java @@ -1,5 +1,7 @@ package com.dotmarketing.common.reindex; +import static com.dotcms.content.index.IndexConfigHelper.logShadowWriteFailure; + import com.dotcms.content.index.IndexConfigHelper; import com.dotcms.content.index.IndexTag; import com.dotcms.content.index.domain.IndexBulkItemResult; @@ -111,8 +113,8 @@ public void afterBulk(final long executionId, final List re // OS shadow — fire-and-forget; log individual failures for observability only results.stream() .filter(IndexBulkItemResult::failed) - .forEach(r -> Logger.warn(this.getClass(), - "[OS] Index failure (fire-and-forget): " + r.failureMessage())); + .forEach(r -> logShadowWriteFailure(this.getClass(), + "[OS] Index failure (fire-and-forget): " + r.failureMessage(), null)); return; } Logger.debug(this.getClass(), "Bulk process completed"); @@ -155,7 +157,7 @@ public void afterBulk(final long executionId, final List re public void afterBulk(final long executionId, final Throwable failure) { final String msg = failure != null ? failure.getMessage() : "(no message)"; if (shadow) { - Logger.warnAndDebug(this.getClass(), + logShadowWriteFailure(this.getClass(), "[OS] Bulk process failed entirely (fire-and-forget): " + msg, failure); return; } diff --git a/dotCMS/src/main/resources/dotmarketing-config.properties b/dotCMS/src/main/resources/dotmarketing-config.properties index 79d8592946e8..b07a2cb5ca15 100644 --- a/dotCMS/src/main/resources/dotmarketing-config.properties +++ b/dotCMS/src/main/resources/dotmarketing-config.properties @@ -922,3 +922,14 @@ telemetry.collection.timeout.seconds=30 # Metrics taking longer than this will be logged as warnings for optimization # Default: 500 milliseconds telemetry.metric.slow.threshold.ms=500 + +## OpenSearch migration — shadow write observability +# Log level for OS shadow write failures during dual-write phases (1 and 2). +# Shadow failures are fire-and-forget: the ES (primary) result is returned to the +# caller regardless of OS outcome. This setting controls how loudly those failures +# are reported. +# Valid values: DEBUG, INFO, WARN, ERROR (default: WARN) +# Set to ERROR to surface shadow failures in dashboards/alerts. +# Set to DEBUG to suppress them during steady-state migration. +#DOTCMS_SHADOW_WRITE_LOG_LEVEL=WARN + diff --git a/dotCMS/src/test/java/com/dotcms/content/index/PhaseRouterTest.java b/dotCMS/src/test/java/com/dotcms/content/index/PhaseRouterTest.java new file mode 100644 index 000000000000..c4323808cefe --- /dev/null +++ b/dotCMS/src/test/java/com/dotcms/content/index/PhaseRouterTest.java @@ -0,0 +1,340 @@ +package com.dotcms.content.index; + +import static com.dotcms.content.index.IndexConfigHelper.MigrationPhase.FLAG_KEY; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import com.dotmarketing.util.Config; +import java.io.IOException; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.Supplier; +import org.junit.After; +import org.junit.Test; + +/** + * Unit tests for {@link PhaseRouter} fire-and-forget and failure-propagation behaviour. + * + *

Scenario simulated

+ *

In a common dual-write catch-up scenario the ES and OS indices carry different + * timestamp suffixes. For example, ES has {@code working_T0} and OS has {@code working_T1}. + * Calling an index lifecycle operation ({@code closeIndex}, {@code openIndex}, {@code delete}) + * with the ES index name succeeds on ES but throws + * {@code [index_not_found_exception]} on OS, because OS does not know about {@code working_T0}.

+ * + *

The routing table below shows who is primary (failure propagates) and who is + * shadow (failure is fire-and-forget) in each phase:

+ * + *
+ * Phase │ Primary │ Shadow │ ES index = T0, OS index = T1  →  closeIndex("T0")
+ * ──────┼─────────┼────────┼────────────────────────────────────────────────────
+ *   0   │ ES only │  —     │ ES ok                             → success
+ *   1   │ ES      │ OS     │ ES ok, OS throws (T0 not in OS)   → success (fire-and-forget)
+ *   2   │ OS      │ ES     │ OS throws (T0 not in OS, primary) → THROWS
+ *   3   │ OS only │  —     │ OS throws                         → THROWS
+ * 
+ * + *

The inverse scenario (OS has T1, ES has T0, caller uses T1) is also tested for Phase 2 + * to confirm that ES shadow failures are fire-and-forget when OS is the primary reader.

+ */ +public class PhaseRouterTest { + + // ── Test setup / teardown ───────────────────────────────────────────────── + + @After + public void clearPhase() { + Config.setProperty(FLAG_KEY, null); + } + + // ========================================================================= + // write() — void unchecked fan-out + // Used by: IndexAPIImpl.closeIndex(), openIndex(), createAlias() + // ========================================================================= + + /** + * Given Scenario: Phase 1 (dual-write, ES reads). ES has "working_T0"; OS has "working_T1". + * When : closeIndex("working_T0") is fanned out to both providers. + * Then : ES (primary) succeeds; OS (shadow) throws index_not_found — exception is swallowed. + * Both providers were called (shadow is never skipped even on primary success). + */ + @Test + public void test_write_phase1_osShadowIndexNotFound_isSwallowed() { + final AtomicBoolean esCalled = new AtomicBoolean(); + final AtomicBoolean osCalled = new AtomicBoolean(); + + // ES has working_T0 → operation succeeds + final Runnable esAction = () -> esCalled.set(true); + + // OS has working_T1 → operation fails (index_not_found for T0) + final Runnable osAction = () -> { + osCalled.set(true); + throw new RuntimeException( + "[index_not_found_exception] no such index [working_T0] — OS index is working_T1"); + }; + + final PhaseRouter router = new PhaseRouter<>(esAction, osAction); + setPhase(1); + + // must not throw — OS shadow failure is fire-and-forget + router.write(Runnable::run); + + assertTrue("ES (primary) must be called", esCalled.get()); + assertTrue("OS (shadow) must also be called", osCalled.get()); + } + + /** + * Given Scenario: Phase 1. ES (primary) fails. + * When : write() is invoked. + * Then : exception propagates to the caller regardless of OS outcome. + * OS is still called (shadow must not be skipped). + */ + @Test + public void test_write_phase1_esPrimaryFailure_propagates() { + final AtomicBoolean osCalled = new AtomicBoolean(); + + final Runnable esAction = () -> { throw new RuntimeException("ES cluster unavailable"); }; + final Runnable osAction = () -> osCalled.set(true); + + final PhaseRouter router = new PhaseRouter<>(esAction, osAction); + setPhase(1); + + assertThrows(RuntimeException.class, () -> router.write(Runnable::run)); + assertTrue("OS (shadow) must be called even when primary fails", osCalled.get()); + } + + /** + * Given Scenario: Phase 2 (dual-write, OS reads). ES has "working_T0"; OS has "working_T1". + * When : closeIndex("working_T0") is fanned out. + * Then : OS is now the primary reader — its failure (index_not_found) must propagate. + */ + @Test + public void test_write_phase2_osPrimaryIndexNotFound_propagates() { + // ES has T0 (it is shadow in Phase 2) → would succeed + final Runnable esAction = () -> {}; + + // OS has T1, not T0 (it is primary in Phase 2) → throws + final Runnable osAction = () -> { + throw new RuntimeException( + "[index_not_found_exception] no such index [working_T0] — OS index is working_T1"); + }; + + final PhaseRouter router = new PhaseRouter<>(esAction, osAction); + setPhase(2); + + assertThrows(RuntimeException.class, () -> router.write(Runnable::run)); + } + + /** + * Given Scenario: Phase 2. Caller uses OS index name "working_T1". ES has "working_T0". + * When : closeIndex("working_T1") is fanned out. + * Then : OS (primary) succeeds; ES (shadow) throws index_not_found for T1 — swallowed. + */ + @Test + public void test_write_phase2_esShadowIndexNotFound_isSwallowed() { + // OS has T1 (primary in Phase 2) → succeeds + final Runnable osAction = () -> {}; + + // ES has T0 (shadow in Phase 2) → T1 not found on ES + final Runnable esAction = () -> { + throw new RuntimeException( + "[index_not_found_exception] no such index [working_T1] — ES index is working_T0"); + }; + + final PhaseRouter router = new PhaseRouter<>(esAction, osAction); + setPhase(2); + + // must not throw — ES shadow failure is fire-and-forget in Phase 2 + router.write(Runnable::run); + } + + /** + * Given Scenario: Phase 3 (OS only). OS does not have the requested index. + * When : write() is invoked. + * Then : exception propagates (single provider, no fire-and-forget). + * ES must not be called. + */ + @Test + public void test_write_phase3_osFailure_propagates_esNeverCalled() { + final Runnable esAction = () -> fail("ES must NOT be called in Phase 3"); + final Runnable osAction = () -> { + throw new RuntimeException("[index_not_found_exception] no such index [working_T0]"); + }; + + final PhaseRouter router = new PhaseRouter<>(esAction, osAction); + setPhase(3); + + assertThrows(RuntimeException.class, () -> router.write(Runnable::run)); + } + + /** + * Given Scenario: Phase 0 (ES only). OS would fail if called. + * When : write() is invoked. + * Then : only ES is called; OS is never reached. + */ + @Test + public void test_write_phase0_osNeverCalled() { + final AtomicBoolean osCalled = new AtomicBoolean(); + final Runnable esAction = () -> {}; + final Runnable osAction = () -> { + osCalled.set(true); + throw new RuntimeException("should not reach OS in Phase 0"); + }; + + final PhaseRouter router = new PhaseRouter<>(esAction, osAction); + setPhase(0); + + router.write(Runnable::run); + assertFalse("OS must NOT be called in Phase 0", osCalled.get()); + } + + // ========================================================================= + // writeBoolean() — boolean fan-out, returns primary result + // Used by: IndexAPIImpl.delete() + // ========================================================================= + + /** + * Given Scenario: Phase 1. ES has "working_T0" (delete returns true); OS has "working_T1". + * When : delete("working_T0") is fanned out. + * Then : OS throws index_not_found — swallowed; primary (ES) result {@code true} returned. + */ + @Test + public void test_writeBoolean_phase1_osShadowIndexNotFound_returnsEsResult() { + // ES.delete("working_T0") = acknowledged (true) + final Supplier esDelete = () -> true; + + // OS.delete("working_T0") = index_not_found + final Supplier osDelete = () -> { + throw new RuntimeException( + "[index_not_found_exception] no such index [working_T0]"); + }; + + final PhaseRouter> router = new PhaseRouter<>(esDelete, osDelete); + setPhase(1); + + final boolean result = router.writeBoolean(Supplier::get); + assertTrue("primary (ES) result must be returned when shadow fails", result); + } + + /** + * Given Scenario: Phase 1. ES.delete = false (e.g. not acknowledged); OS also fails. + * When : delete() is fanned out. + * Then : ES result false is returned (shadow swallowed); false is authoritative. + */ + @Test + public void test_writeBoolean_phase1_esReturnsFalse_shadowIgnored() { + final Supplier esDelete = () -> false; + final Supplier osDelete = () -> { + throw new RuntimeException("[index_not_found_exception] working_T0"); + }; + + final PhaseRouter> router = new PhaseRouter<>(esDelete, osDelete); + setPhase(1); + + final boolean result = router.writeBoolean(Supplier::get); + assertFalse("primary (ES) false result must be preserved", result); + } + + /** + * Given Scenario: Phase 2. OS.delete("working_T0") throws (OS is primary). + * When : delete() is fanned out. + * Then : exception propagates. + */ + @Test + public void test_writeBoolean_phase2_osPrimaryFailure_propagates() { + final Supplier esDelete = () -> true; + final Supplier osDelete = () -> { + throw new RuntimeException("[index_not_found_exception] working_T0"); + }; + + final PhaseRouter> router = new PhaseRouter<>(esDelete, osDelete); + setPhase(2); + + assertThrows(RuntimeException.class, () -> router.writeBoolean(Supplier::get)); + } + + // ========================================================================= + // writeChecked() — checked exception fan-out + // Used by: IndexAPIImpl.clearIndex(), createIndex(), updateReplicas() + // ========================================================================= + + /** + * Given Scenario: Phase 1. OS throws a checked IOException (index_not_found). + * When : writeChecked() is invoked. + * Then : checked exception is swallowed; no exception reaches the caller. + */ + @Test + public void test_writeChecked_phase1_osShadowCheckedFailure_isSwallowed() throws Exception { + final AtomicBoolean esCalled = new AtomicBoolean(); + + final PhaseRouter router = new PhaseRouter<>( + () -> esCalled.set(true), // ES succeeds + () -> { throw new IOException("[index_not_found_exception] working_T0"); } + ); + setPhase(1); + + // must not throw — checked exception from OS shadow is swallowed + router.writeChecked(ThrowingAction::run); + assertTrue("ES (primary) must be called", esCalled.get()); + } + + /** + * Given Scenario: Phase 1. ES (primary) throws a checked IOException. + * When : writeChecked() is invoked. + * Then : checked exception propagates to the caller. + */ + @Test + public void test_writeChecked_phase1_esPrimaryCheckedFailure_propagates() { + final PhaseRouter router = new PhaseRouter<>( + () -> { throw new IOException("ES index update failed"); }, + () -> {} // OS succeeds (shadow) + ); + setPhase(1); + + try { + router.writeChecked(ThrowingAction::run); + fail("expected IOException to propagate"); + } catch (IOException expected) { + assertTrue(expected.getMessage().contains("ES index update failed")); + } catch (Exception unexpected) { + fail("unexpected exception type: " + unexpected); + } + } + + /** + * Given Scenario: Phase 2. OS (primary) throws a checked IOException. + * When : writeChecked() is invoked. + * Then : checked exception propagates. + */ + @Test + public void test_writeChecked_phase2_osPrimaryCheckedFailure_propagates() { + final PhaseRouter router = new PhaseRouter<>( + () -> {}, // ES (shadow in Phase 2) succeeds + () -> { throw new IOException("[index_not_found_exception] working_T0"); } + ); + setPhase(2); + + try { + router.writeChecked(ThrowingAction::run); + fail("expected IOException to propagate"); + } catch (IOException expected) { + // correct + } catch (Exception unexpected) { + fail("unexpected exception type: " + unexpected); + } + } + + // ========================================================================= + // Helpers + // ========================================================================= + + /** Simulates a checked index operation (e.g. clearIndex, createIndex). */ + @FunctionalInterface + interface ThrowingAction { + void run() throws Exception; + } + + private static void setPhase(final int ordinal) { + Config.setProperty(FLAG_KEY, String.valueOf(ordinal)); + } +} From 3be5b9cb701d26b35f04aab2404db1064a855694 Mon Sep 17 00:00:00 2001 From: fabrizzio-dotCMS Date: Mon, 20 Apr 2026 13:08:56 -0600 Subject: [PATCH 2/5] test(opensearch): add phase-aware unit and integration tests for ContentletIndexAPIImpl Adds ContentletIndexAPIImplPhaseTest (unit, full mock injection) and ContentletIndexAPIImplMigrationIT (integration, requires two real clusters) covering deactivateIndex, createContentIndex, closeIndex, and activateIndex across all four migration phases. Includes a full-dependency constructor on ContentletIndexAPIImpl for isolated unit testing without APILocator. Registers ContentletIndexAPIImplMigrationIT in OpenSearchUpgradeSuite. Refs: #35302 Co-Authored-By: Claude Sonnet 4.6 --- .../business/ContentletIndexAPIImpl.java | 28 +- .../ContentletIndexAPIImplPhaseTest.java | 636 ++++++++++++++++ .../com/dotcms/OpenSearchUpgradeSuite.java | 4 +- .../ContentletIndexAPIImplMigrationIT.java | 676 ++++++++++++++++++ 4 files changed, 1342 insertions(+), 2 deletions(-) create mode 100644 dotCMS/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplPhaseTest.java create mode 100644 dotcms-integration/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplMigrationIT.java diff --git a/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java b/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java index 845a68821823..74a335d4bead 100644 --- a/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java +++ b/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java @@ -176,7 +176,8 @@ public ContentletIndexAPIImpl() { CDIUtils.getBeanThrows(ContentletIndexOperationsOS.class)); } - /** Package-private constructor for testing. */ + /** Package-private constructor for testing: injects only the two provider operations. + * Still calls APILocator for the remaining dependencies. */ ContentletIndexAPIImpl(final ContentletIndexOperations operationsES, final ContentletIndexOperations operationsOS) { this.operationsES = operationsES; @@ -192,6 +193,31 @@ public ContentletIndexAPIImpl() { // Use getMappingAPI() for lazy initialization at first use. } + /** + * Full constructor for unit testing — injects all dependencies without calling + * {@link com.dotmarketing.business.APILocator}, allowing fully isolated tests. + * + * @param operationsES ES write operations provider + * @param operationsOS OS write operations provider + * @param indexAPI phase-aware index management API (controls list/cluster operations) + * @param legacyIndiciesAPI ES index pointer store (working/live slots) + * @param versionedIndicesAPI OS index pointer store (working/live slots) + */ + ContentletIndexAPIImpl( + final ContentletIndexOperations operationsES, + final ContentletIndexOperations operationsOS, + final IndexAPI indexAPI, + final IndiciesAPI legacyIndiciesAPI, + final VersionedIndicesAPI versionedIndicesAPI) { + this.operationsES = operationsES; + this.operationsOS = operationsOS; + this.router = new PhaseRouter<>(operationsES, operationsOS); + this.queueApi = null; // not needed for the methods under test + this.indexAPI = indexAPI; + this.legacyIndiciesAPI = legacyIndiciesAPI; + this.versionedIndicesAPI = versionedIndicesAPI; + } + /** * Lazy initializer avoids circular reference Stackoverflow error. * Thread-safe: uses {@link AtomicReference#updateAndGet} to ensure diff --git a/dotCMS/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplPhaseTest.java b/dotCMS/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplPhaseTest.java new file mode 100644 index 000000000000..77c82172f545 --- /dev/null +++ b/dotCMS/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplPhaseTest.java @@ -0,0 +1,636 @@ +package com.dotcms.content.elasticsearch.business; + +import static com.dotcms.content.index.IndexConfigHelper.MigrationPhase.FLAG_KEY; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import com.dotcms.content.index.ContentletIndexOperations; +import com.dotcms.content.index.IndexAPI; +import com.dotcms.content.index.VersionedIndices; +import com.dotcms.content.index.VersionedIndicesAPI; +import com.dotcms.content.index.VersionedIndicesImpl; +import com.dotcms.content.index.domain.ClusterIndexHealth; +import com.dotcms.content.index.domain.ClusterStats; +import com.dotcms.content.index.domain.CreateIndexStatus; +import com.dotcms.content.index.domain.IndexBulkListener; +import com.dotcms.content.index.domain.IndexBulkProcessor; +import com.dotcms.content.index.domain.IndexBulkRequest; +import com.dotcms.content.index.domain.IndexStats; +import com.dotcms.contenttype.model.type.ContentType; +import com.dotmarketing.exception.DotDataException; +import com.dotmarketing.util.Config; +import java.io.IOException; +import java.sql.Connection; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import org.junit.After; +import org.junit.Test; + +/** + * Unit tests for {@link ContentletIndexAPIImpl} phase-aware behavior + * in the realistic "two different index names" scenario: + * + *
    + *
  • dotCMS started in Phase 0 and created ES index {@code working_T0} / {@code live_T0}
  • + *
  • Migration was started and OS index {@code working_T1} / {@code live_T1} was created
  • + *
  • Both indices co-exist; callers may supply either name
  • + *
+ * + *

These tests document the current, observable behavior of each API method + * across migration phases without requiring a running Elasticsearch or OpenSearch cluster. + * All infrastructure is replaced by in-memory fakes injected via the package-private + * testing constructor.

+ */ +public class ContentletIndexAPIImplPhaseTest { + + // ── Logical index names ─────────────────────────────────────────────────── + /** ES index timestamp suffix (created first, in Phase 0). */ + private static final String ES_WORKING = "working_T0"; + private static final String ES_LIVE = "live_T0"; + + /** OS index timestamp suffix (created during migration catchup). */ + private static final String OS_WORKING = "working_T1"; + private static final String OS_LIVE = "live_T1"; + + /** Cluster prefix prepended by both providers' {@code toPhysicalName()}. */ + private static final String CLUSTER_PREFIX = "cluster_test."; + + // ── Test teardown ───────────────────────────────────────────────────────── + + @After + public void clearPhase() { + Config.setProperty(FLAG_KEY, null); + } + + // ========================================================================= + // isDotCMSIndexName — purely syntactic (prefix check, no provider query) + // ========================================================================= + + /** + * Given Scenario: any migration phase. + * The method is purely syntactic: it checks whether the name starts with + * {@code "working_"} or {@code "live_"}. + * + * When : isDotCMSIndexName() is called with the ES logical name. + * Then : returns true — the ES name is a valid dotCMS index name. + */ + @Test + public void test_isDotCMSIndexName_esLogicalName_isTrue() { + final ContentletIndexAPIImpl api = buildApi( + new FakeIndexAPI(List.of(ES_WORKING, ES_LIVE)), + new FakeIndiciesAPI(), + new FakeVersionedIndicesAPI()); + + assertTrue("ES working name must be recognised as a dotCMS index", + api.isDotCMSIndexName(ES_WORKING)); + assertTrue("ES live name must be recognised as a dotCMS index", + api.isDotCMSIndexName(ES_LIVE)); + } + + /** + * Given Scenario: any migration phase. + * When : isDotCMSIndexName() is called with the OS logical name. + * Then : returns true — even though only OS has this index, the name itself + * starts with "working_" so it passes the syntactic check. + * The method does NOT query which provider actually holds the index. + */ + @Test + public void test_isDotCMSIndexName_osLogicalName_isTrue() { + final ContentletIndexAPIImpl api = buildApi( + new FakeIndexAPI(List.of()), + new FakeIndiciesAPI(), + new FakeVersionedIndicesAPI()); + + assertTrue("OS working name must be recognised as a dotCMS index (syntactic check only)", + api.isDotCMSIndexName(OS_WORKING)); + assertTrue("OS live name must be recognised as a dotCMS index", + api.isDotCMSIndexName(OS_LIVE)); + } + + /** + * Given Scenario: any phase. + * When : isDotCMSIndexName() is called with a physical name (cluster prefix included). + * Then : returns false — physical names do NOT start with "working_" or "live_". + * Callers must strip the cluster prefix before invoking this method. + */ + @Test + public void test_isDotCMSIndexName_physicalNameWithClusterPrefix_isFalse() { + final ContentletIndexAPIImpl api = buildApi( + new FakeIndexAPI(List.of()), + new FakeIndiciesAPI(), + new FakeVersionedIndicesAPI()); + + assertFalse("Physical name with cluster prefix must NOT be recognised", + api.isDotCMSIndexName(CLUSTER_PREFIX + ES_WORKING)); + assertFalse("Physical OS name with cluster prefix must NOT be recognised", + api.isDotCMSIndexName(CLUSTER_PREFIX + OS_WORKING)); + } + + /** + * Given Scenario: any phase. + * When : isDotCMSIndexName() is called with a name that has no recognised dotCMS prefix. + * Then : returns false. + */ + @Test + public void test_isDotCMSIndexName_unknownPrefix_isFalse() { + final ContentletIndexAPIImpl api = buildApi( + new FakeIndexAPI(List.of()), + new FakeIndiciesAPI(), + new FakeVersionedIndicesAPI()); + + assertFalse("Unrecognised prefix must return false", + api.isDotCMSIndexName("notadotcmsindex_T0")); + assertFalse("Null must return false", api.isDotCMSIndexName(null)); + } + + // ========================================================================= + // listDotCMSIndices — pure delegation to IndexAPI.getIndices(true, false) + // ========================================================================= + + /** + * Given Scenario: Phase 0 (ES only). + * The fake IndexAPI is pre-loaded with ES indices only. + * When : listDotCMSIndices() is called. + * Then : returns exactly the ES indices — OS is not consulted in Phase 0. + * The phase-awareness is encapsulated inside IndexAPIImpl (the real + * implementation); ContentletIndexAPIImpl simply delegates. + */ + @Test + public void test_listDotCMSIndices_phase0_esIndicesOnly() { + setPhase(0); + final ContentletIndexAPIImpl api = buildApi( + new FakeIndexAPI(List.of(ES_WORKING, ES_LIVE)), + new FakeIndiciesAPI(), + new FakeVersionedIndicesAPI()); + + final List indices = api.listDotCMSIndices(); + + assertEquals(2, indices.size()); + assertTrue(indices.contains(ES_WORKING)); + assertTrue(indices.contains(ES_LIVE)); + assertFalse("OS index must not appear in Phase 0", indices.contains(OS_WORKING)); + } + + /** + * Given Scenario: Phase 1 or 2 (dual-write). The fake IndexAPI returns a merged + * list of ES and OS indices, simulating IndexAPIImpl's merge behavior. + * When : listDotCMSIndices() is called. + * Then : returns indices from both providers. + * Each provider contributes its own timestamped names — T0 from ES, T1 from OS. + */ + @Test + public void test_listDotCMSIndices_dualWrite_returnsBothProviders() { + setPhase(1); + final List merged = List.of(ES_WORKING, ES_LIVE, OS_WORKING, OS_LIVE); + final ContentletIndexAPIImpl api = buildApi( + new FakeIndexAPI(merged), + new FakeIndiciesAPI(), + new FakeVersionedIndicesAPI()); + + final List indices = api.listDotCMSIndices(); + + assertEquals(4, indices.size()); + assertTrue(indices.contains(ES_WORKING)); + assertTrue(indices.contains(OS_WORKING)); + } + + /** + * Given Scenario: Phase 3 (OS only). The fake IndexAPI returns OS indices only. + * When : listDotCMSIndices() is called. + * Then : returns only OS indices — ES is decommissioned. + */ + @Test + public void test_listDotCMSIndices_phase3_osIndicesOnly() { + setPhase(3); + final ContentletIndexAPIImpl api = buildApi( + new FakeIndexAPI(List.of(OS_WORKING, OS_LIVE)), + new FakeIndiciesAPI(), + new FakeVersionedIndicesAPI()); + + final List indices = api.listDotCMSIndices(); + + assertEquals(2, indices.size()); + assertTrue(indices.contains(OS_WORKING)); + assertFalse("ES index must not appear in Phase 3", indices.contains(ES_WORKING)); + } + + // ========================================================================= + // activateIndex — writes to index pointer stores (IndiciesAPI / VersionedIndicesAPI) + // ========================================================================= + + /** + * Given Scenario: Phase 0 (ES only). ES store is empty. + * When : activateIndex("working_T0") is called (ES name). + * Then : ES store (legacyIndiciesAPI) is updated with the physical working name. + * OS store (versionedIndicesAPI) is NOT touched — migration has not started. + */ + @Test + public void test_activateIndex_phase0_esName_updatesEsStoreOnly() throws DotDataException { + setPhase(0); + final FakeIndiciesAPI fakeIndicie = new FakeIndiciesAPI(); + final FakeVersionedIndicesAPI fakeVersioned = new FakeVersionedIndicesAPI(); + final ContentletIndexAPIImpl api = buildApi( + new FakeIndexAPI(List.of(ES_WORKING, ES_LIVE)), + fakeIndicie, + fakeVersioned); + + api.activateIndex(ES_WORKING); + + assertEquals("ES store must record the physical working name", + CLUSTER_PREFIX + ES_WORKING, fakeIndicie.loadIndicies().getWorking()); + assertNull("OS store must not be touched in Phase 0", + fakeVersioned.stored); + } + + /** + * Given Scenario: Phase 0 (ES only). Caller passes the OS index name (working_T1), + * even though OS is not active in Phase 0. + * When : activateIndex("working_T1") is called. + * Then : ES store is updated with the physical T1 name — activateIndex does NOT + * validate that the index physically exists; it only writes to the pointer store. + * OS store is NOT touched. + * + *

Observation: in Phase 0 the ES store can be pointed at an index + * name that does not physically exist in the ES cluster, because activateIndex is + * a pure pointer-store update with no existence check.

+ */ + @Test + public void test_activateIndex_phase0_osName_goesToEsStore_noOsWrite() throws DotDataException { + setPhase(0); + final FakeIndiciesAPI fakeIndicie = new FakeIndiciesAPI(); + final FakeVersionedIndicesAPI fakeVersioned = new FakeVersionedIndicesAPI(); + final ContentletIndexAPIImpl api = buildApi( + new FakeIndexAPI(List.of(ES_WORKING, ES_LIVE)), + fakeIndicie, + fakeVersioned); + + api.activateIndex(OS_WORKING); // OS name passed in Phase 0 + + assertEquals("ES store must record the OS physical name even in Phase 0", + CLUSTER_PREFIX + OS_WORKING, fakeIndicie.loadIndicies().getWorking()); + assertNull("OS store must not be touched in Phase 0", fakeVersioned.stored); + } + + /** + * Given Scenario: Phase 2 (dual-write, OS reads). Caller uses the ES index name T0. + * When : activateIndex("working_T0") is called. + * Then : ES store is updated with T0 (physical). + * OS store mirror is also updated — but it receives the SAME logical name T0, + * even though OS physically has T1. The mirror writes the name as-is, without + * checking which index actually exists in the OS cluster. + * + *

Key observation: after this call, the OS pointer store records + * "working_T0" as the active working index — a mismatch with the physical OS index. + * This is expected behavior during the catch-up window.

+ */ + @Test + public void test_activateIndex_phase2_esName_mirroredToOsStore() throws DotDataException { + setPhase(2); + final FakeIndiciesAPI fakeIndicie = new FakeIndiciesAPI(); + final FakeVersionedIndicesAPI fakeVersioned = new FakeVersionedIndicesAPI(); + final ContentletIndexAPIImpl api = buildApi( + new FakeIndexAPI(List.of()), + fakeIndicie, + fakeVersioned); + + api.activateIndex(ES_WORKING); + + // ES pointer store is updated ✓ + assertEquals("ES store must record the physical working name", + CLUSTER_PREFIX + ES_WORKING, fakeIndicie.loadIndicies().getWorking()); + + // OS store receives a mirror of the same logical name + assertTrue("OS store must be populated in Phase 2", + fakeVersioned.stored != null); + assertEquals("OS store must contain the mirrored working name (even if OS has a different physical index)", + CLUSTER_PREFIX + ES_WORKING, + fakeVersioned.stored.working().orElse(null)); + } + + /** + * Given Scenario: Phase 2 (dual-write, OS reads). Caller uses the OS index name T1. + * When : activateIndex("working_T1") is called. + * Then : ES store is updated with T1 (even though ES physically has T0). + * OS store is also updated with T1 — in this case both stores are correct + * because the caller used the OS-native name. + */ + @Test + public void test_activateIndex_phase2_osName_updatesBothStores() throws DotDataException { + setPhase(2); + final FakeIndiciesAPI fakeIndicie = new FakeIndiciesAPI(); + final FakeVersionedIndicesAPI fakeVersioned = new FakeVersionedIndicesAPI(); + final ContentletIndexAPIImpl api = buildApi( + new FakeIndexAPI(List.of()), + fakeIndicie, + fakeVersioned); + + api.activateIndex(OS_WORKING); + + assertEquals("ES store must record the OS physical name", + CLUSTER_PREFIX + OS_WORKING, fakeIndicie.loadIndicies().getWorking()); + assertEquals("OS store must record its own physical name", + CLUSTER_PREFIX + OS_WORKING, + fakeVersioned.stored.working().orElse(null)); + } + + /** + * Given Scenario: Phase 3 (OS only). + * When : activateIndex("working_T1") is called. + * Then : only the OS store (versionedIndicesAPI) is updated. + * ES store (legacyIndiciesAPI) is NOT touched — ES is decommissioned. + */ + @Test + public void test_activateIndex_phase3_onlyOsStoreUpdated() throws DotDataException { + setPhase(3); + final FakeIndiciesAPI fakeIndicie = new FakeIndiciesAPI(); + final FakeVersionedIndicesAPI fakeVersioned = new FakeVersionedIndicesAPI(); + final ContentletIndexAPIImpl api = buildApi( + new FakeIndexAPI(List.of()), + fakeIndicie, + fakeVersioned); + + api.activateIndex(OS_WORKING); + + assertNull("ES store must NOT be touched in Phase 3 (ES is decommissioned)", + fakeIndicie.loadIndicies().getWorking()); + assertEquals("OS store must record the working name", + CLUSTER_PREFIX + OS_WORKING, + fakeVersioned.stored.working().orElse(null)); + } + + // ========================================================================= + // deactivateIndex — clears a slot from the pointer stores by index type + // ========================================================================= + + /** + * Given Scenario: Phase 0 (ES only). ES store has working=T0, live=T0-live. + * When : deactivateIndex("working_T0") is called. + * Then : ES store working slot is cleared (null); live slot is preserved. + * OS store is NOT touched. + */ + @Test + public void test_deactivateIndex_phase0_clearsEsWorkingSlotOnly() + throws DotDataException, IOException { + setPhase(0); + final FakeIndiciesAPI fakeIndicie = new FakeIndiciesAPI(); + fakeIndicie.setWorking(CLUSTER_PREFIX + ES_WORKING); + fakeIndicie.setLive(CLUSTER_PREFIX + ES_LIVE); + final FakeVersionedIndicesAPI fakeVersioned = new FakeVersionedIndicesAPI(); + + final ContentletIndexAPIImpl api = buildApi( + new FakeIndexAPI(List.of()), fakeIndicie, fakeVersioned); + + api.deactivateIndex(ES_WORKING); + + assertNull("Working slot must be cleared after deactivation", + fakeIndicie.loadIndicies().getWorking()); + assertEquals("Live slot must be preserved", + CLUSTER_PREFIX + ES_LIVE, fakeIndicie.loadIndicies().getLive()); + assertNull("OS store must not be touched in Phase 0", fakeVersioned.stored); + } + + /** + * Given Scenario: Phase 2 (dual-write). Both ES and OS stores have their respective + * working indices recorded (T0 in ES, T1 in OS). + * When : deactivateIndex("working_T0") is called. + * Then : ES store working slot is cleared. + * OS store working slot is ALSO cleared — the deactivation clears the slot + * by INDEX TYPE (WORKING), not by the specific index name (T0 vs T1). + * The OS live slot is preserved. + * + *

Key observation: deactivateIndex identifies which slot to clear + * via the index-type prefix ("working_" / "live_"), not via the exact name. + * Passing "working_T0" in Phase 2 will clear the OS working slot even though OS + * physically records "working_T1" there.

+ */ + @Test + public void test_deactivateIndex_phase2_clearsBothStoresByIndexType() + throws DotDataException, IOException { + setPhase(2); + final FakeIndiciesAPI fakeIndicie = new FakeIndiciesAPI(); + fakeIndicie.setWorking(CLUSTER_PREFIX + ES_WORKING); + fakeIndicie.setLive(CLUSTER_PREFIX + ES_LIVE); + + final FakeVersionedIndicesAPI fakeVersioned = new FakeVersionedIndicesAPI(); + // Pre-populate OS store: OS working = T1, OS live = T1-live + fakeVersioned.stored = VersionedIndicesImpl.builder() + .working(CLUSTER_PREFIX + OS_WORKING) + .live(CLUSTER_PREFIX + OS_LIVE) + .build(); + + final ContentletIndexAPIImpl api = buildApi( + new FakeIndexAPI(List.of()), fakeIndicie, fakeVersioned); + + // Deactivate using the ES index name — but the SLOT (WORKING) is cleared in both stores + api.deactivateIndex(ES_WORKING); + + assertNull("ES working slot must be cleared", + fakeIndicie.loadIndicies().getWorking()); + assertEquals("ES live slot must be preserved", + CLUSTER_PREFIX + ES_LIVE, fakeIndicie.loadIndicies().getLive()); + + // OS mirror: working slot cleared (by index type), live preserved + assertNull("OS working slot must also be cleared (deactivation is by index type, not name)", + fakeVersioned.stored.working().orElse(null)); + assertEquals("OS live slot must be preserved", + CLUSTER_PREFIX + OS_LIVE, fakeVersioned.stored.live().orElse(null)); + } + + // ========================================================================= + // Factory and helpers + // ========================================================================= + + private static ContentletIndexAPIImpl buildApi( + final FakeIndexAPI fakeIndex, + final FakeIndiciesAPI fakeIndicie, + final FakeVersionedIndicesAPI fakeVersioned) { + return new ContentletIndexAPIImpl( + new FakeContentletIndexOperations(), + new FakeContentletIndexOperations(), + fakeIndex, + fakeIndicie, + fakeVersioned); + } + + private static void setPhase(final int ordinal) { + Config.setProperty(FLAG_KEY, String.valueOf(ordinal)); + } + + // ========================================================================= + // Fake implementations — in-memory stubs with no vendor dependencies + // ========================================================================= + + /** + * In-memory {@link IndexAPI} stub. + * Only the three methods used by the target methods are implemented; + * all others throw {@link UnsupportedOperationException}. + */ + static class FakeIndexAPI implements IndexAPI { + + private final List openIndices; + + FakeIndexAPI(final List openIndices) { + this.openIndices = new ArrayList<>(openIndices); + } + + @Override + public List getIndices(final boolean expandOpen, final boolean expandClosed) { + return Collections.unmodifiableList(openIndices); + } + + @Override + public List getClosedIndexes() { + return List.of(); + } + + @Override + public String getNameWithClusterIDPrefix(final String name) { + return name.startsWith(CLUSTER_PREFIX) ? name : CLUSTER_PREFIX + name; + } + + // ── unneeded methods ───────────────────────────────────────────────── + + @Override public Map getIndicesStats() { throw new UnsupportedOperationException(); } + @Override public Map flushCaches(List n) { throw new UnsupportedOperationException(); } + @Override public boolean optimize(List n) { throw new UnsupportedOperationException(); } + @Override public boolean delete(String n) { throw new UnsupportedOperationException(); } + @Override public boolean deleteMultiple(String... n) { throw new UnsupportedOperationException(); } + @Override public void deleteInactiveLiveWorkingIndices(int n) { throw new UnsupportedOperationException(); } + @Override public Set listIndices() { throw new UnsupportedOperationException(); } + @Override public boolean isIndexClosed(String n) { throw new UnsupportedOperationException(); } + @Override public boolean indexExists(String n) { throw new UnsupportedOperationException(); } + @Override public void createIndex(String n) { throw new UnsupportedOperationException(); } + @Override public CreateIndexStatus createIndex(String n, int s) { throw new UnsupportedOperationException(); } + @Override public void clearIndex(String n) { throw new UnsupportedOperationException(); } + @Override public CreateIndexStatus createIndex(String n, String s, int sh) { throw new UnsupportedOperationException(); } + @Override public String getDefaultIndexSettings() { throw new UnsupportedOperationException(); } + @Override public Map getClusterHealth() { throw new UnsupportedOperationException(); } + @Override public void updateReplicas(String n, int r) { throw new UnsupportedOperationException(); } + @Override public void createAlias(String n, String a) { throw new UnsupportedOperationException(); } + @Override public Map getIndexAlias(List n) { throw new UnsupportedOperationException(); } + @Override public Map getIndexAlias(String[] n) { throw new UnsupportedOperationException(); } + @Override public String getIndexAlias(String n) { throw new UnsupportedOperationException(); } + @Override public Map getAliasToIndexMap(List n) { throw new UnsupportedOperationException(); } + @Override public void closeIndex(String n) { throw new UnsupportedOperationException(); } + @Override public void openIndex(String n) { throw new UnsupportedOperationException(); } + @Override public List getLiveWorkingIndicesSortedByCreationDateDesc() { throw new UnsupportedOperationException(); } + @Override public Status getIndexStatus(String n) { throw new UnsupportedOperationException(); } + @Override public boolean waitUtilIndexReady() { throw new UnsupportedOperationException(); } + @Override public ClusterStats getClusterStats() { throw new UnsupportedOperationException(); } + } + + /** + * In-memory {@link IndiciesAPI} stub that stores the current index pointers as a + * mutable {@link IndiciesInfo}. + */ + @SuppressWarnings("deprecation") + static class FakeIndiciesAPI implements IndiciesAPI { + + private IndiciesInfo current = new IndiciesInfo.Builder().build(); + + void setWorking(final String working) { + current = new IndiciesInfo.Builder() + .setWorking(working) + .setLive(current.getLive()) + .build(); + } + + void setLive(final String live) { + current = new IndiciesInfo.Builder() + .setWorking(current.getWorking()) + .setLive(live) + .build(); + } + + @Override + public IndiciesInfo loadIndicies() { + return current; + } + + @Override + public IndiciesInfo loadIndicies(final Connection conn) { + return current; + } + + @Override + public void point(final IndiciesInfo newInfo) { + this.current = newInfo; + } + } + + /** + * In-memory {@link VersionedIndicesAPI} stub that stores a single + * {@link VersionedIndices} record (the "default" OS record). + */ + static class FakeVersionedIndicesAPI implements VersionedIndicesAPI { + + VersionedIndices stored = null; + + @Override + public Optional loadDefaultVersionedIndices() { + return Optional.ofNullable(stored); + } + + @Override + public void saveIndices(final VersionedIndices info) { + this.stored = info; + } + + @Override + public Optional loadNonVersionedIndices() { + return Optional.empty(); + } + + // ── unneeded methods ───────────────────────────────────────────────── + + @Override public Optional loadIndices(String v) { throw new UnsupportedOperationException(); } + @Override public List loadAllIndices() { throw new UnsupportedOperationException(); } + @Override public void removeVersion(String v) { throw new UnsupportedOperationException(); } + @Override public boolean versionExists(String v) { throw new UnsupportedOperationException(); } + @Override public int getIndicesCount(String v) { throw new UnsupportedOperationException(); } + @Override public Instant extractTimestamp(String n) { throw new UnsupportedOperationException(); } + @Override public void clearCache() { /* no-op */ } + } + + /** + * Minimal {@link ContentletIndexOperations} stub used only as a constructor argument. + * + *

Only {@link #toPhysicalName} is implemented (the default interface method is + * overridden to avoid calling a real {@link IndexAPI}). All bulk and lifecycle + * operations throw {@link UnsupportedOperationException} since none of the + * tested methods invoke them.

+ */ + static class FakeContentletIndexOperations implements ContentletIndexOperations { + + @Override + public String toPhysicalName(final String indexName) { + return indexName.startsWith(CLUSTER_PREFIX) ? indexName : CLUSTER_PREFIX + indexName; + } + + @Override + public IndexAPI indexAPI() { + throw new UnsupportedOperationException("indexAPI() not used by tests"); + } + + // ── unneeded methods ───────────────────────────────────────────────── + + @Override public IndexBulkRequest createBulkRequest() { throw new UnsupportedOperationException(); } + @Override public void addIndexOp(IndexBulkRequest r, String i, String d, String j) { throw new UnsupportedOperationException(); } + @Override public void addDeleteOp(IndexBulkRequest r, String i, String d) { throw new UnsupportedOperationException(); } + @Override public void setRefreshPolicy(IndexBulkRequest r, IndexBulkRequest.RefreshPolicy p) { throw new UnsupportedOperationException(); } + @Override public void putToIndex(IndexBulkRequest r) { throw new UnsupportedOperationException(); } + @Override public IndexBulkProcessor createBulkProcessor(IndexBulkListener l) { throw new UnsupportedOperationException(); } + @Override public void addIndexOpToProcessor(IndexBulkProcessor p, String i, String d, String j) { throw new UnsupportedOperationException(); } + @Override public void addDeleteOpToProcessor(IndexBulkProcessor p, String i, String d) { throw new UnsupportedOperationException(); } + @Override public boolean createContentIndex(String n, int s) { throw new UnsupportedOperationException(); } + @Override public void removeContentFromIndexByContentType(ContentType t) { throw new UnsupportedOperationException(); } + @Override public long getIndexDocumentCount(String n) { throw new UnsupportedOperationException(); } + } +} diff --git a/dotcms-integration/src/test/java/com/dotcms/OpenSearchUpgradeSuite.java b/dotcms-integration/src/test/java/com/dotcms/OpenSearchUpgradeSuite.java index fa81478f3d4b..0a8b93f95fe0 100644 --- a/dotcms-integration/src/test/java/com/dotcms/OpenSearchUpgradeSuite.java +++ b/dotcms-integration/src/test/java/com/dotcms/OpenSearchUpgradeSuite.java @@ -1,5 +1,6 @@ package com.dotcms; +import com.dotcms.content.elasticsearch.business.ContentletIndexAPIImplMigrationIT; import com.dotcms.content.index.opensearch.ContentFactoryIndexOperationsOSIntegrationTest; import com.dotcms.content.index.opensearch.ContentletIndexOperationsOSIntegrationTest; import com.dotcms.content.index.opensearch.OSCreateContentIndexIntegrationTest; @@ -38,7 +39,8 @@ OSCreateContentIndexIntegrationTest.class, ContentFactoryIndexOperationsOSIntegrationTest.class, OSClientProviderIntegrationTest.class, - OSClientConfigTest.class + OSClientConfigTest.class, + ContentletIndexAPIImplMigrationIT.class }) public class OpenSearchUpgradeSuite { } \ No newline at end of file diff --git a/dotcms-integration/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplMigrationIT.java b/dotcms-integration/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplMigrationIT.java new file mode 100644 index 000000000000..7a5414147d8b --- /dev/null +++ b/dotcms-integration/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplMigrationIT.java @@ -0,0 +1,676 @@ +package com.dotcms.content.elasticsearch.business; + +import static com.dotcms.content.index.IndexConfigHelper.MigrationPhase.FLAG_KEY; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assume.assumeFalse; + +import com.dotcms.DataProviderWeldRunner; +import com.dotcms.IntegrationTestBase; +import com.dotcms.content.index.IndexAPIImpl; +import com.dotcms.content.index.VersionedIndices; +import com.dotcms.content.index.opensearch.OSIndexAPIImpl; +import com.dotcms.util.IntegrationTestInitService; +import com.dotmarketing.business.APILocator; +import com.dotmarketing.exception.DotDataException; +import com.dotmarketing.util.Config; +import com.dotmarketing.util.Logger; +import io.vavr.control.Try; +import java.io.IOException; +import java.util.List; +import java.util.Optional; +import java.util.UUID; +import javax.enterprise.context.ApplicationScoped; +import javax.inject.Inject; +import org.junit.After; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; + +/** + * Integration tests for {@link ContentletIndexAPIImpl} that exercise the phase-aware routing + * behavior against live search clusters. + * + *

Scenario

+ *

The tests simulate the real catch-up situation that arises during the ES→OS migration: + * Elasticsearch (or an ES-compatible cluster) was the original search backend and holds index + * {@code working_T0} / {@code live_T0}. OpenSearch was brought up later and received index + * {@code working_T1} / {@code live_T1}. Both clusters are live simultaneously; the migration + * phase flag controls which provider is treated as primary.

+ * + *

Two-cluster vs single-cluster profiles

+ *

Some tests require separate ES and OS clusters to observe cluster-isolation + * behavior (e.g. "Phase 0 shows only ES indices"). When both clients point to the same cluster + * — as happens in the {@code opensearch-upgrade} Maven profile where + * {@code DOT_ES_ENDPOINTS == OS_ENDPOINTS} — those tests are automatically skipped via + * {@link org.junit.Assume}. Tests that only verify routing logic or DB pointer state work + * correctly in both single- and two-cluster setups.

+ * + *

Run command

+ *
+ *   ./mvnw verify -pl :dotcms-integration \
+ *       -Dcoreit.test.skip=false \
+ *       -Dopensearch.upgrade.test=true \
+ *       -Dit.test=ContentletIndexAPIImplMigrationIT
+ * 
+ * + * @author Fabrizzio Araya + */ +@ApplicationScoped +@RunWith(DataProviderWeldRunner.class) +public class ContentletIndexAPIImplMigrationIT extends IntegrationTestBase { + + // ── Unique suffix prevents cross-run index name collisions ──────────────── + private static final String RUN_ID = + UUID.randomUUID().toString().replace("-", "").substring(0, 8); + + /** + * ES index name — represents the index created during Phase 0 (before OS existed). + */ + private static final String ES_WORKING = "working_t0_" + RUN_ID; + private static final String ES_LIVE = "live_t0_" + RUN_ID; + + /** + * OS index name — represents the index created during migration catch-up. + * Different timestamp suffix → different from the ES name. + */ + private static final String OS_WORKING = "working_t1_" + RUN_ID; + private static final String OS_LIVE = "live_t1_" + RUN_ID; + + /** + * Name used for the dual-write fan-out test — a single logical name that + * {@code createContentIndex()} sends to both providers simultaneously. + */ + private static final String DUAL_WORKING = "working_dual_" + RUN_ID; + + /** + * A name that both ES and OS reject at the cluster level: spaces are not allowed in index names. + * Used to verify that cluster-level validation errors always propagate — they are NOT + * swallowed by the fire-and-forget mechanism (which only applies to shadow {@code index_not_found}). + */ + private static final String INVALID_INDEX_NAME = "invalid name with spaces " + RUN_ID; + + /** + * A name that matches the {@code working} prefix (so {@link IndexType#WORKING} recognises it) + * but was never created in any cluster. + * Used to verify that {@code deactivateIndex} is a DB-pointer-only operation: it clears + * the slot based on the name pattern, without checking cluster existence. + */ + private static final String GHOST_WORKING = "working_ghost_" + RUN_ID; + + // ── CDI-injected direct OS handle (bypasses the phase router) ──────────── + @Inject + private OSIndexAPIImpl osIndexAPI; + + // ── Saved DB state — restored in @After to avoid polluting the running app ── + @SuppressWarnings("deprecation") + private IndiciesInfo savedEsInfo; + private Optional savedOsIndices; + + // ========================================================================= + // Lifecycle + // ========================================================================= + + @BeforeClass + public static void prepare() throws Exception { + IntegrationTestInitService.getInstance().init(); + } + + @Before + public void setUp() throws Exception { + savedEsInfo = APILocator.getIndiciesAPI().loadIndicies(); + savedOsIndices = APILocator.getVersionedIndicesAPI().loadDefaultVersionedIndices(); + + cleanupTestIndices(); + + // Create ES indices directly against the ES cluster (bypasses phase routing) + esImpl().createIndex(ES_WORKING, 1); + esImpl().createIndex(ES_LIVE, 1); + + // Create OS indices directly against the OS cluster (bypasses phase routing) + osIndexAPI.createIndex(OS_WORKING, 1); + osIndexAPI.createIndex(OS_LIVE, 1); + } + + @After + public void tearDown() throws Exception { + Config.setProperty(FLAG_KEY, null); + cleanupTestIndices(); + + Try.run(() -> APILocator.getIndiciesAPI().point(savedEsInfo)); + savedOsIndices.ifPresent(v -> + Try.run(() -> APILocator.getVersionedIndicesAPI().saveIndices(v))); + } + + // ========================================================================= + // listDotCMSIndices — real cluster visibility by phase + // + // NOTE: Phase 0 and Phase 3 isolation tests require two *separate* clusters + // (ES on one endpoint, OS on another). They are skipped automatically when + // the profile routes both clients to the same cluster. + // ========================================================================= + + /** + * Given Scenario: Phase 0 (ES only). ES has T0; OS has T1. + * When : listDotCMSIndices() is called. + * Then : Only the ES indices are returned — OS cluster not queried. + * Skip : when ES and OS endpoints are the same cluster (single-cluster profile). + */ + @Test + public void test_listDotCMSIndices_phase0_returnsEsIndicesOnly() { + assumeFalse("Requires separate ES and OS clusters (ES and OS point to same endpoint)", + esSameAsOs()); + setPhase(0); + + final List indices = contentletIndexAPI().listDotCMSIndices(); + + assertTrue("ES working must appear in Phase 0", indices.contains(ES_WORKING)); + assertTrue("ES live must appear in Phase 0", indices.contains(ES_LIVE)); + assertFalse("OS working must NOT appear in Phase 0 (OS not queried)", + indices.contains(OS_WORKING)); + + Logger.info(this, "✅ listDotCMSIndices Phase 0 — ES only: " + indices); + } + + /** + * Given Scenario: Phase 1 (dual-write). Both clusters are live. + * When : listDotCMSIndices() is called. + * Then : Both ES (T0) and OS (T1) indices appear — different names from different clusters. + * Works in single-cluster profile too: both T0 and T1 exist in the shared cluster. + */ + @Test + public void test_listDotCMSIndices_phase1_returnsBothIndexNames() { + setPhase(1); + + final List indices = contentletIndexAPI().listDotCMSIndices(); + + assertTrue("ES working (T0) must appear in Phase 1", indices.contains(ES_WORKING)); + assertTrue("ES live (T0) must appear in Phase 1", indices.contains(ES_LIVE)); + assertTrue("OS working (T1) must appear in Phase 1", indices.contains(OS_WORKING)); + assertTrue("OS live (T1) must appear in Phase 1", indices.contains(OS_LIVE)); + + Logger.info(this, "✅ listDotCMSIndices Phase 1 — both index names: " + indices); + } + + /** + * Given Scenario: Phase 3 (OS only). OS has T1. + * When : listDotCMSIndices() is called. + * Then : Only OS indices appear — ES decommissioned. + * Skip : when ES and OS endpoints are the same cluster. + */ + @Test + public void test_listDotCMSIndices_phase3_returnsOsIndicesOnly() { + assumeFalse("Requires separate ES and OS clusters (ES and OS point to same endpoint)", + esSameAsOs()); + setPhase(3); + + final List indices = contentletIndexAPI().listDotCMSIndices(); + + assertTrue("OS working must appear in Phase 3", indices.contains(OS_WORKING)); + assertFalse("ES working must NOT appear in Phase 3 (ES decommissioned)", + indices.contains(ES_WORKING)); + + Logger.info(this, "✅ listDotCMSIndices Phase 3 — OS only: " + indices); + } + + // ========================================================================= + // createContentIndex — real dual-write fan-out + // ========================================================================= + + /** + * Given Scenario: Phase 0. A new content index is requested. + * When : createContentIndex() is called. + * Then : Index created ONLY in ES. OS cluster unchanged. + * Skip : when ES and OS point to the same cluster. + */ + @Test + public void test_createContentIndex_phase0_createsOnlyInEs() throws IOException, DotIndexException { + assumeFalse("Requires separate ES and OS clusters", esSameAsOs()); + setPhase(0); + + assertFalse("Pre: not in ES yet", esImpl().indexExists(DUAL_WORKING)); + assertFalse("Pre: not in OS yet", osIndexAPI.indexExists(DUAL_WORKING)); + + contentletIndexAPI().createContentIndex(DUAL_WORKING, 1); + + assertTrue("Must exist in ES after Phase 0 createContentIndex", + esImpl().indexExists(DUAL_WORKING)); + assertFalse("Must NOT exist in OS in Phase 0", + osIndexAPI.indexExists(DUAL_WORKING)); + + Logger.info(this, "✅ createContentIndex Phase 0 — ES only: " + DUAL_WORKING); + } + + /** + * Given Scenario: Phase 1 (dual-write). A new content index is requested. + * When : createContentIndex() is called. + * Then : Index created in BOTH clusters simultaneously — the core dual-write guarantee. + * Works in single-cluster profile too (same cluster receives both writes). + */ + @Test + public void test_createContentIndex_phase1_createsInBothClusters() throws IOException, DotIndexException { + setPhase(1); + + assertFalse("Pre: not in ES yet", esImpl().indexExists(DUAL_WORKING)); + assertFalse("Pre: not in OS yet", osIndexAPI.indexExists(DUAL_WORKING)); + + contentletIndexAPI().createContentIndex(DUAL_WORKING, 1); + + assertTrue("Must exist in ES after Phase 1 (fan-out)", + esImpl().indexExists(DUAL_WORKING)); + assertTrue("Must exist in OS after Phase 1 (fan-out)", + osIndexAPI.indexExists(DUAL_WORKING)); + + Logger.info(this, "✅ createContentIndex Phase 1 — both: " + DUAL_WORKING); + } + + /** + * Given Scenario: Phase 3. A new content index is requested. + * When : createContentIndex() is called. + * Then : Index created ONLY in OS. + * Skip : when ES and OS point to the same cluster. + */ + @Test + public void test_createContentIndex_phase3_createsOnlyInOs() throws IOException, DotIndexException { + assumeFalse("Requires separate ES and OS clusters", esSameAsOs()); + setPhase(3); + + assertFalse("Pre: not in ES yet", esImpl().indexExists(DUAL_WORKING)); + assertFalse("Pre: not in OS yet", osIndexAPI.indexExists(DUAL_WORKING)); + + contentletIndexAPI().createContentIndex(DUAL_WORKING, 1); + + assertFalse("Must NOT exist in ES in Phase 3 (ES decommissioned)", + esImpl().indexExists(DUAL_WORKING)); + assertTrue("Must exist in OS after Phase 3", + osIndexAPI.indexExists(DUAL_WORKING)); + + Logger.info(this, "✅ createContentIndex Phase 3 — OS only: " + DUAL_WORKING); + } + + // ========================================================================= + // activateIndex — pointer-store writes verified against real DB + // ========================================================================= + + /** + * Given Scenario: Phase 0. ES store is currently pointing at the pre-existing app index. + * When : activateIndex(ES_WORKING) is called. + * Then : ES DB working slot is updated to T0. + * OS DB is unchanged compared to what it was before this call + * (Phase 0 → isMigrationStarted() = false → OS mirror block never executes). + */ + @Test + public void test_activateIndex_phase0_updatesEsDbOnly() throws DotDataException { + setPhase(0); + // Capture OS state immediately before the call — for before/after comparison + final Optional osBefore = + APILocator.getVersionedIndicesAPI().loadDefaultVersionedIndices(); + + contentletIndexAPI().activateIndex(ES_WORKING); + + final IndiciesInfo esInfo = APILocator.getIndiciesAPI().loadIndicies(); + assertTrue("ES DB working slot must hold the T0 physical name", + esInfo.getWorking() != null && esInfo.getWorking().endsWith(ES_WORKING)); + + // OS DB must be exactly as it was before — activateIndex in Phase 0 must not touch it + final Optional osAfter = + APILocator.getVersionedIndicesAPI().loadDefaultVersionedIndices(); + assertEquals("OS DB working slot must be unchanged by Phase 0 activateIndex", + osBefore.flatMap(VersionedIndices::working).orElse(null), + osAfter.flatMap(VersionedIndices::working).orElse(null)); + + Logger.info(this, "✅ activateIndex Phase 0 — ES DB: " + esInfo.getWorking()); + } + + /** + * Given Scenario: Phase 2 (dual-write, OS reads). Caller passes the ES index name T0. + * When : activateIndex(ES_WORKING) is called. + * Then : ES DB updated with T0. + * OS DB mirror ALSO updated with T0 — even though OS physically holds T1. + * This documents the known mismatch: the OS DB pointer reflects the name + * passed in, regardless of which index the OS cluster actually holds. + */ + @Test + public void test_activateIndex_phase2_esName_mirroredToOsDb() throws DotDataException { + setPhase(2); + + contentletIndexAPI().activateIndex(ES_WORKING); + + final IndiciesInfo esInfo = APILocator.getIndiciesAPI().loadIndicies(); + assertTrue("ES DB must hold T0 physical name", + esInfo.getWorking() != null && esInfo.getWorking().endsWith(ES_WORKING)); + + final Optional osInfo = + APILocator.getVersionedIndicesAPI().loadDefaultVersionedIndices(); + assertTrue("OS DB must be populated with the mirrored name", + osInfo.isPresent() + && osInfo.get().working().map(w -> w.endsWith(ES_WORKING)).orElse(false)); + + Logger.info(this, "✅ activateIndex Phase 2 (ES name) — ES DB: " + esInfo.getWorking() + + ", OS DB: " + osInfo.map(v -> v.working().orElse("(empty)")).orElse("(absent)")); + } + + /** + * Given Scenario: Phase 2. Caller passes the OS index name T1. + * When : activateIndex(OS_WORKING) is called. + * Then : Both pointer stores consistently point to T1. + */ + @Test + public void test_activateIndex_phase2_osName_updatesBothDbsConsistently() throws DotDataException { + setPhase(2); + + contentletIndexAPI().activateIndex(OS_WORKING); + + final IndiciesInfo esInfo = APILocator.getIndiciesAPI().loadIndicies(); + assertTrue("ES DB must point to T1", + esInfo.getWorking() != null && esInfo.getWorking().endsWith(OS_WORKING)); + + final Optional osInfo = + APILocator.getVersionedIndicesAPI().loadDefaultVersionedIndices(); + assertTrue("OS DB must also point to T1", + osInfo.isPresent() + && osInfo.get().working().map(w -> w.endsWith(OS_WORKING)).orElse(false)); + + Logger.info(this, "✅ activateIndex Phase 2 (OS name) — both DBs point to T1"); + } + + /** + * Given Scenario: Phase 3. + * When : activateIndex(OS_WORKING) is called. + * Then : Only OS DB updated. Legacy ES DB unchanged. + */ + @Test + public void test_activateIndex_phase3_onlyOsDbUpdated() throws DotDataException { + setPhase(3); + final String esWorkingBefore = APILocator.getIndiciesAPI().loadIndicies().getWorking(); + + contentletIndexAPI().activateIndex(OS_WORKING); + + assertEquals("ES DB must NOT be touched in Phase 3", + esWorkingBefore, APILocator.getIndiciesAPI().loadIndicies().getWorking()); + final Optional osInfo = + APILocator.getVersionedIndicesAPI().loadDefaultVersionedIndices(); + assertTrue("OS DB must hold T1", + osInfo.isPresent() + && osInfo.get().working().map(w -> w.endsWith(OS_WORKING)).orElse(false)); + + Logger.info(this, "✅ activateIndex Phase 3 — only OS DB updated"); + } + + // ========================================================================= + // fire-and-forget — real index_not_found handling at cluster level + // ========================================================================= + + /** + * Given Scenario: Phase 1 (dual-write, ES reads). ES has T0; OS has T1. + * When : closeIndex("working_T0") is called — T0 exists in ES but NOT in OS. + * Then : ES close succeeds; OS throws {@code index_not_found} for T0 (shadow in Phase 1). + * Exception is swallowed — the call returns normally. + * This is the exact bug scenario from #35302. + * + *

Skip: when ES and OS point to the same cluster — in that case T0 exists in + * both client views so neither throws {@code index_not_found} and the + * fire-and-forget path is not exercised.

+ */ + @Test + public void test_closeIndex_phase1_osIndexNotFound_isSwallowed() { + assumeFalse("Requires separate ES and OS clusters to get real index_not_found from OS", + esSameAsOs()); + setPhase(1); + + try { + APILocator.getESIndexAPI().closeIndex(ES_WORKING); + Logger.info(this, "✅ closeIndex Phase 1 — OS index_not_found swallowed successfully"); + } catch (final RuntimeException e) { + throw new AssertionError( + "closeIndex must NOT throw in Phase 1 when OS has a different index name. Got: " + + e.getMessage(), e); + } + } + + /** + * Given Scenario: Phase 1. ES has T0; OS has T1. Caller uses the OS name T1. + * When : closeIndex("working_T1") is called — T1 exists in OS but NOT in ES. + * Then : ES is primary in Phase 1 → ES throws {@code index_not_found} for T1. + * Primary failure PROPAGATES — callers must not use the OS-native name in Phase 1. + * + *

Skip: when ES and OS point to the same cluster — T1 exists in the ES view too.

+ */ + @Test(expected = RuntimeException.class) + public void test_closeIndex_phase1_esIndexNotFound_propagates() { + assumeFalse("Requires separate ES and OS clusters", esSameAsOs()); + setPhase(1); + + // OS_WORKING exists in OS but NOT in ES — ES is primary → must throw + APILocator.getESIndexAPI().closeIndex(OS_WORKING); + } + + /** + * Given Scenario: Phase 3 (OS only). OS has T1; T0 does not exist in OS. + * When : closeIndex("working_T0") is called. + * Then : OS is the only provider and is primary → exception propagates. + * + *

Skip: when ES and OS point to the same cluster — T0 also exists in the OS view.

+ */ + @Test(expected = RuntimeException.class) + public void test_closeIndex_phase3_osIndexNotFound_propagates() { + assumeFalse("Requires separate ES and OS clusters", esSameAsOs()); + setPhase(3); + + APILocator.getESIndexAPI().closeIndex(ES_WORKING); + } + + // ========================================================================= + // deactivateIndex — ghost index (name never created in any cluster) + // + // deactivateIndex is a pure pointer-store operation: it does NOT query the cluster + // to verify the index exists. It uses IndexType.WORKING.is(name) (a startsWith check) + // to decide which DB slot to clear, then writes the result back to the store. + // + // Calling it with a name that was never created must: + // - not throw (cluster absence is irrelevant) + // - clear the working slot in the primary pointer store + // ========================================================================= + + /** + * Given Scenario: Phase 1 (dual-write, ES reads). {@code GHOST_WORKING} starts with + * "working" but was never created in either cluster. + * When : deactivateIndex(GHOST_WORKING) is called. + * Then : No exception — {@code deactivateIndex} never validates cluster existence. + * The ES DB working slot is cleared (name matched {@link IndexType#WORKING}). + * The OS DB working slot is also cleared (mirrored in Phase 1). + * + *

This documents the fundamental contract: {@code deactivateIndex} is a + * pointer-store update driven by the name pattern, not by cluster state.

+ */ + @Test + @SuppressWarnings("deprecation") + public void test_deactivateIndex_ghostIndex_phase1_clearsWorkingSlot() + throws DotDataException, IOException { + setPhase(1); + + // Verify the ghost index truly doesn't exist in either cluster + assertFalse("Pre: ghost must not exist in ES", esImpl().indexExists(GHOST_WORKING)); + assertFalse("Pre: ghost must not exist in OS", osIndexAPI.indexExists(GHOST_WORKING)); + + // Give BOTH working AND live slots a known value. + // The live slot must survive the deactivate call so that the VersionedIndices + // builder always has at least one field set (saveIndices rejects empty builders). + contentletIndexAPI().activateIndex(ES_WORKING); + contentletIndexAPI().activateIndex(ES_LIVE); + assertNotNull("Pre: ES DB working slot must be non-null before deactivate", + APILocator.getIndiciesAPI().loadIndicies().getWorking()); + + // deactivateIndex with a ghost name must NOT throw + contentletIndexAPI().deactivateIndex(GHOST_WORKING); + + // ES DB working slot must be null — GHOST_WORKING starts with "working", + // so IndexType.WORKING.is(GHOST_WORKING) == true → builder.setWorking(null) + final IndiciesInfo esInfo = APILocator.getIndiciesAPI().loadIndicies(); + assertNull("ES DB working slot must be cleared after deactivating a ghost index", + esInfo.getWorking()); + + // OS DB working slot must also be cleared (mirrored in Phase 1). + // The live slot in OS must be preserved (builder kept it because IndexType.LIVE + // did not match GHOST_WORKING, so the save is valid). + final Optional osInfo = + APILocator.getVersionedIndicesAPI().loadDefaultVersionedIndices(); + assertTrue("OS DB working slot must be cleared (Phase 1 mirrors ES deactivation)", + osInfo.map(v -> v.working().isEmpty()).orElse(true)); + assertTrue("OS DB live slot must be preserved after deactivating the working slot", + osInfo.flatMap(VersionedIndices::live).isPresent()); + + Logger.info(this, + "✅ deactivateIndex Phase 1 cleared both DB working slots for ghost index: " + + GHOST_WORKING); + } + + /** + * Given Scenario: Phase 3 (OS only). {@code GHOST_WORKING} was never created in OS. + * When : deactivateIndex(GHOST_WORKING) is called. + * Then : No exception — OS is the primary store but cluster existence is not validated. + * The OS DB working slot is cleared; the legacy ES DB is not touched. + */ + @Test + @SuppressWarnings("deprecation") + public void test_deactivateIndex_ghostIndex_phase3_clearsOsWorkingSlot() + throws DotDataException, IOException { + setPhase(3); + + assertFalse("Pre: ghost must not exist in OS", osIndexAPI.indexExists(GHOST_WORKING)); + + // Give BOTH working AND live slots a known value. + // The live slot must survive the deactivate call so that VersionedIndicesAPI + // never sees an empty builder (saveIndices rejects empty builders). + contentletIndexAPI().activateIndex(OS_WORKING); + contentletIndexAPI().activateIndex(OS_LIVE); + assertTrue("Pre: OS DB working slot must be set", + APILocator.getVersionedIndicesAPI() + .loadDefaultVersionedIndices() + .flatMap(VersionedIndices::working) + .isPresent()); + + // Capture ES DB state — Phase 3 must not touch it + final String esWorkingBefore = + APILocator.getIndiciesAPI().loadIndicies().getWorking(); + + // Must not throw + contentletIndexAPI().deactivateIndex(GHOST_WORKING); + + // OS DB working slot cleared; live slot preserved (builder had live → save valid) + final Optional osAfter = + APILocator.getVersionedIndicesAPI().loadDefaultVersionedIndices(); + assertTrue("OS DB working slot must be cleared after deactivating a ghost index", + osAfter.map(v -> v.working().isEmpty()).orElse(true)); + assertTrue("OS DB live slot must be preserved after deactivating the working slot", + osAfter.flatMap(VersionedIndices::live).isPresent()); + + // Legacy ES DB untouched in Phase 3 + final String esWorkingAfter = + APILocator.getIndiciesAPI().loadIndicies().getWorking(); + assertEquals("ES DB must NOT be touched by deactivateIndex in Phase 3", + esWorkingBefore, esWorkingAfter); + + Logger.info(this, + "✅ deactivateIndex Phase 3 cleared OS DB working slot without touching ES DB"); + } + + // ========================================================================= + // Invalid index names — cluster rejection must always propagate + // + // The fire-and-forget mechanism silences shadow index_not_found errors only. + // A cluster-level rejection of a syntactically invalid name is a primary failure + // and must propagate regardless of phase. + // ========================================================================= + + /** + * Given Scenario: Phase 1 (dual-write, ES reads). Index name contains spaces. + * When : createContentIndex(invalidName) is called. + * Then : Both providers reject the name at the cluster level. + * {@link ContentletIndexAPI#createContentIndex(String,int)} + * absorbs provider-level exceptions and converts them to a {@code false} return value — + * this is the documented soft-failure contract for that method. + * The return value {@code false} signals to the caller that no cluster acknowledged the request. + */ + @Test + public void test_createContentIndex_invalidName_phase1_returnsFalse() + throws IOException, DotIndexException { + setPhase(1); + + final boolean result = contentletIndexAPI().createContentIndex(INVALID_INDEX_NAME, 1); + + assertFalse( + "createContentIndex must return false when clusters reject an invalid index name (Phase 1)", + result); + Logger.info(this, "✅ createContentIndex Phase 1 returned false for invalid name (as expected)"); + } + + /** + * Given Scenario: Phase 3 (OS only). Index name contains spaces. + * When : createContentIndex(invalidName) is called. + * Then : OS is the sole provider and rejects the name. The soft-failure return value + * {@code false} surfaces — the same contract as Phase 1, with a single-provider path. + */ + @Test + public void test_createContentIndex_invalidName_phase3_returnsFalse() + throws IOException, DotIndexException { + setPhase(3); + + final boolean result = contentletIndexAPI().createContentIndex(INVALID_INDEX_NAME, 1); + + assertFalse( + "createContentIndex must return false when the cluster rejects an invalid index name (Phase 3)", + result); + Logger.info(this, "✅ createContentIndex Phase 3 returned false for invalid name (as expected)"); + } + + // ========================================================================= + // Helpers + // ========================================================================= + + /** + * Returns {@code true} when the ES client and OS client are configured to talk to + * the same cluster endpoint. This happens in the {@code opensearch-upgrade} Maven profile, + * where {@code DOT_ES_ENDPOINTS} is overridden to equal {@code OS_ENDPOINTS}. + * + *

Tests that require two physically separate clusters use this to skip themselves + * via {@link org.junit.Assume#assumeFalse}.

+ */ + private static boolean esSameAsOs() { + final String esEndpoint = Config.getStringProperty("DOT_ES_ENDPOINTS", + "http://localhost:9207"); + final String osEndpoint = Config.getStringProperty("OS_ENDPOINTS", + "http://localhost:9201"); + return esEndpoint.equalsIgnoreCase(osEndpoint.trim()); + } + + private static void setPhase(final int ordinal) { + Config.setProperty(FLAG_KEY, String.valueOf(ordinal)); + } + + private static ContentletIndexAPI contentletIndexAPI() { + return APILocator.getContentletIndexAPI(); + } + + private static ESIndexAPI esImpl() { + return ((IndexAPIImpl) APILocator.getESIndexAPI()).esImpl(); + } + + private void cleanupTestIndices() { + final ESIndexAPI esIndex = esImpl(); + for (final String name : List.of(ES_WORKING, ES_LIVE, DUAL_WORKING)) { + Try.run(() -> { if (esIndex.indexExists(name)) esIndex.delete(name); }) + .onFailure(e -> Logger.warn(this, + "Cleanup: error removing ES index '" + name + "': " + e.getMessage())); + } + for (final String name : List.of(OS_WORKING, OS_LIVE, DUAL_WORKING)) { + Try.run(() -> { if (osIndexAPI.indexExists(name)) osIndexAPI.delete(name); }) + .onFailure(e -> Logger.warn(this, + "Cleanup: error removing OS index '" + name + "': " + e.getMessage())); + } + } +} From a43b494dbb53367d13fd3ef93f43e70a13562fb2 Mon Sep 17 00:00:00 2001 From: fabrizzio-dotCMS Date: Mon, 20 Apr 2026 13:30:46 -0600 Subject: [PATCH 3/5] fix(opensearch): guard addCustomMapping behind result check in createContentIndex addCustomMapping was called unconditionally after a failed index creation, which could apply mappings to a non-existent index and surface an unrelated exception instead of the clean false-return the soft-failure contract promises. Co-Authored-By: Claude Sonnet 4.6 --- .../elasticsearch/business/ContentletIndexAPIImpl.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java b/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java index 74a335d4bead..baadc0a67c32 100644 --- a/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java +++ b/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java @@ -594,7 +594,9 @@ public synchronized boolean createContentIndex(final String indexName, final int result = false; } } - MappingHelper.getInstance().addCustomMapping(indexName); + if (result) { + MappingHelper.getInstance().addCustomMapping(indexName); + } return result; } From 3e109748c3ffe52e300d7ba95907fbb5eabc56b9 Mon Sep 17 00:00:00 2001 From: fabrizzio-dotCMS Date: Mon, 20 Apr 2026 20:17:03 -0600 Subject: [PATCH 4/5] fix(opensearch): address PR #35389 review feedback - Logger: add info(Class, String, Throwable) overload so INFO-level log calls can forward the throwable just like DEBUG/WARN/ERROR do - IndexConfigHelper: pass throwable `t` in the INFO branch of logShadowWriteFailure so operators don't silently lose stack traces - ContentletIndexAPIImpl: guard addCustomMapping behind if(contentIndex) in the private createContentIndex(String,int,IndexTag) overload to match the public overload's behaviour; annotate queueApi with @Nullable to document the testing-constructor invariant - ContentletIndexAPIImplMigrationIT: trim both operands in esSameAsOs() to avoid false negatives from trailing whitespace; add .onFailure() warnings to tearDown Try.run calls so DB-restore failures are visible Co-Authored-By: Claude Sonnet 4.6 --- .../elasticsearch/business/ContentletIndexAPIImpl.java | 6 +++++- .../java/com/dotcms/content/index/IndexConfigHelper.java | 2 +- dotCMS/src/main/java/com/dotmarketing/util/Logger.java | 8 ++++++++ .../business/ContentletIndexAPIImplMigrationIT.java | 8 +++++--- 4 files changed, 19 insertions(+), 5 deletions(-) diff --git a/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java b/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java index baadc0a67c32..85ecbfbafbb0 100644 --- a/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java +++ b/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java @@ -76,6 +76,7 @@ import com.liferay.portal.util.PortalUtil; import com.liferay.util.StringPool; import com.rainerhahnekamp.sneakythrow.Sneaky; +import javax.annotation.Nullable; import io.vavr.control.Try; import java.io.IOException; import java.sql.Connection; @@ -153,6 +154,7 @@ public class ContentletIndexAPIImpl implements ContentletIndexAPI { private static final String SELECT_CONTENTLET_VERSION_INFO = "select working_inode,live_inode from contentlet_version_info where identifier IN (%s)"; + @Nullable private final ReindexQueueAPI queueApi; private final IndexAPI indexAPI; private final IndiciesAPI legacyIndiciesAPI; @@ -619,7 +621,9 @@ private boolean createContentIndex(final String indexName, final int shards, Ind } final String physicalName = ops.toPhysicalName(indexName); final boolean contentIndex = ops.createContentIndex(physicalName, shards); - helper.addCustomMapping(List.of(indexName),tag); + if (contentIndex) { + helper.addCustomMapping(List.of(indexName), tag); + } return contentIndex; } diff --git a/dotCMS/src/main/java/com/dotcms/content/index/IndexConfigHelper.java b/dotCMS/src/main/java/com/dotcms/content/index/IndexConfigHelper.java index f2fa38f6e214..162887509824 100644 --- a/dotCMS/src/main/java/com/dotcms/content/index/IndexConfigHelper.java +++ b/dotCMS/src/main/java/com/dotcms/content/index/IndexConfigHelper.java @@ -59,7 +59,7 @@ static void logShadowWriteFailure(final Class clazz, .toUpperCase(); switch (level) { case "DEBUG": Logger.debug(clazz, message, t); break; - case "INFO": Logger.info(clazz, message); break; + case "INFO": Logger.info(clazz, message, t); break; case "ERROR": Logger.error(clazz, message, t); break; default: Logger.warn(clazz, message, t); break; } diff --git a/dotCMS/src/main/java/com/dotmarketing/util/Logger.java b/dotCMS/src/main/java/com/dotmarketing/util/Logger.java index acfa2c33ce9a..5e58e4ed0580 100644 --- a/dotCMS/src/main/java/com/dotmarketing/util/Logger.java +++ b/dotCMS/src/main/java/com/dotmarketing/util/Logger.java @@ -91,6 +91,14 @@ public static void info(Class cl, String message) { loadLogger(cl).info(message); } + public static void info(Class cl, String message, Throwable ex) { + if (isVelocityMessage(cl)) { + velocityInfo(cl, message); + return; + } + loadLogger(cl).info(message, ex); + } + public static void info(String cl, String message) { loadLogger(cl).info(message); } diff --git a/dotcms-integration/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplMigrationIT.java b/dotcms-integration/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplMigrationIT.java index 7a5414147d8b..1c08cf15bf0b 100644 --- a/dotcms-integration/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplMigrationIT.java +++ b/dotcms-integration/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplMigrationIT.java @@ -141,9 +141,11 @@ public void tearDown() throws Exception { Config.setProperty(FLAG_KEY, null); cleanupTestIndices(); - Try.run(() -> APILocator.getIndiciesAPI().point(savedEsInfo)); + Try.run(() -> APILocator.getIndiciesAPI().point(savedEsInfo)) + .onFailure(e -> Logger.warn(this, "tearDown: failed to restore ES indices info: " + e.getMessage())); savedOsIndices.ifPresent(v -> - Try.run(() -> APILocator.getVersionedIndicesAPI().saveIndices(v))); + Try.run(() -> APILocator.getVersionedIndicesAPI().saveIndices(v)) + .onFailure(e -> Logger.warn(this, "tearDown: failed to restore OS versioned indices: " + e.getMessage()))); } // ========================================================================= @@ -645,7 +647,7 @@ private static boolean esSameAsOs() { "http://localhost:9207"); final String osEndpoint = Config.getStringProperty("OS_ENDPOINTS", "http://localhost:9201"); - return esEndpoint.equalsIgnoreCase(osEndpoint.trim()); + return esEndpoint.trim().equalsIgnoreCase(osEndpoint.trim()); } private static void setPhase(final int ordinal) { From 90b72a707744a3f28c5eb9d117ec896fe5233310 Mon Sep 17 00:00:00 2001 From: fabrizzio-dotCMS Date: Mon, 20 Apr 2026 20:38:52 -0600 Subject: [PATCH 5/5] =?UTF-8?q?fix(opensearch):=20address=20PR=20#35389=20?= =?UTF-8?q?second=20review=20=E2=80=94=20primary/shadow=20separation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - ContentletIndexAPIImpl.createContentIndex(String,int): replace the shared `result &=` accumulator with per-provider tracking so a shadow write failure in dual-write phases does not prevent addCustomMapping from running against the successfully-created primary index - PhaseRouter.writeBoolean: initialize primaryResult to false (safe default) instead of true — makes the silent invariant explicit: "assume failure until the primary provider confirms success" Co-Authored-By: Claude Sonnet 4.6 --- .../business/ContentletIndexAPIImpl.java | 20 ++++++++++++++----- .../com/dotcms/content/index/PhaseRouter.java | 2 +- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java b/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java index 85ecbfbafbb0..72f2eb143254 100644 --- a/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java +++ b/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java @@ -586,20 +586,30 @@ public synchronized boolean createContentIndex(String indexName) @Override public synchronized boolean createContentIndex(final String indexName, final int shards) throws IOException { - boolean result = true; + // Track the primary provider's result independently so a shadow failure in dual-write + // phases (ES primary ok, OS shadow fails) does NOT prevent addCustomMapping from running + // against the successfully-created primary index. + final ContentletIndexOperations primary = router.readProvider(); + boolean primaryResult = false; for (final ContentletIndexOperations ops : router.writeProviders()) { final String physicalName = ops.toPhysicalName(indexName); try { - result &= ops.createContentIndex(physicalName, shards); + final boolean r = ops.createContentIndex(physicalName, shards); + if (ops == primary) { + primaryResult = r; + } } catch (Exception e) { Logger.error(this.getClass(), "Error while creating content index " + physicalName, e); - result = false; + if (ops == primary) { + primaryResult = false; + } + // shadow failures are fire-and-forget in dual-write phases } } - if (result) { + if (primaryResult) { MappingHelper.getInstance().addCustomMapping(indexName); } - return result; + return primaryResult; } diff --git a/dotCMS/src/main/java/com/dotcms/content/index/PhaseRouter.java b/dotCMS/src/main/java/com/dotcms/content/index/PhaseRouter.java index 25298ed286d6..03754b5d5a7a 100644 --- a/dotCMS/src/main/java/com/dotcms/content/index/PhaseRouter.java +++ b/dotCMS/src/main/java/com/dotcms/content/index/PhaseRouter.java @@ -235,7 +235,7 @@ public boolean writeBoolean(final Function fn) { } // Dual-write: call every provider; only primary result is returned final T primary = readProvider(); - boolean primaryResult = true; + boolean primaryResult = false; // safe default: assume failure until primary confirms success RuntimeException primaryEx = null; for (final T impl : providers) { try {