diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java b/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java index cb6276ed98b60..47ab43ff1e27c 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java @@ -176,6 +176,14 @@ public void process(IndexRequest indexRequest) { } } + private static boolean shouldUseTimeBasedId(final IndexMode indexMode, final IndexVersion creationVersion) { + return indexMode == IndexMode.LOGSDB && isNewIndexVersion(creationVersion); + } + + private static boolean isNewIndexVersion(final IndexVersion creationVersion) { + return creationVersion.onOrAfter(IndexVersions.TIME_BASED_K_ORDERED_DOC_ID_BACKPORT); + } + @Override public int indexShard( String id, diff --git a/server/src/main/java/org/elasticsearch/common/TimeBasedKOrderedUUIDGenerator.java b/server/src/main/java/org/elasticsearch/common/TimeBasedKOrderedUUIDGenerator.java index 9c97cb8fe7e85..1c8307dbddede 100644 --- a/server/src/main/java/org/elasticsearch/common/TimeBasedKOrderedUUIDGenerator.java +++ b/server/src/main/java/org/elasticsearch/common/TimeBasedKOrderedUUIDGenerator.java @@ -11,6 +11,7 @@ import java.nio.ByteBuffer; import java.util.Base64; +import java.util.function.Supplier; /** * Generates a base64-encoded, k-ordered UUID string optimized for compression and efficient indexing. @@ -28,18 +29,27 @@ * The result is a compact base64-encoded string, optimized for efficient compression of the _id field in an inverted index. */ public class TimeBasedKOrderedUUIDGenerator extends TimeBasedUUIDGenerator { - private static final Base64.Encoder BASE_64_NO_PADDING = Base64.getEncoder().withoutPadding(); + + private static final Base64.Encoder BASE_64_NO_PADDING_URL_ENCODER = Base64.getUrlEncoder().withoutPadding(); + + public TimeBasedKOrderedUUIDGenerator( + final Supplier timestampSupplier, + final Supplier sequenceIdSupplier, + final Supplier macAddressSupplier + ) { + super(timestampSupplier, sequenceIdSupplier, macAddressSupplier); + } @Override public String getBase64UUID() { - final int sequenceId = this.sequenceNumber.incrementAndGet() & 0x00FF_FFFF; + final int sequenceId = sequenceNumber.incrementAndGet() & 0x00FF_FFFF; // Calculate timestamp to ensure ordering and avoid backward movement in case of time shifts. // Uses AtomicLong to guarantee that timestamp increases even if the system clock moves backward. // If the sequenceId overflows (reaches 0 within the same millisecond), the timestamp is incremented // to ensure strict ordering. long timestamp = this.lastTimestamp.accumulateAndGet( - currentTimeMillis(), + timestampSupplier.get(), sequenceId == 0 ? (lastTimestamp, currentTimeMillis) -> Math.max(lastTimestamp, currentTimeMillis) + 1 : Math::max ); @@ -68,6 +78,6 @@ public String getBase64UUID() { assert buffer.position() == uuidBytes.length; - return BASE_64_NO_PADDING.encodeToString(uuidBytes); + return BASE_64_NO_PADDING_URL_ENCODER.encodeToString(uuidBytes); } } diff --git a/server/src/main/java/org/elasticsearch/common/TimeBasedUUIDGenerator.java b/server/src/main/java/org/elasticsearch/common/TimeBasedUUIDGenerator.java index 00d031a835ef1..36b8f3ddd3d33 100644 --- a/server/src/main/java/org/elasticsearch/common/TimeBasedUUIDGenerator.java +++ b/server/src/main/java/org/elasticsearch/common/TimeBasedUUIDGenerator.java @@ -12,6 +12,7 @@ import java.util.Base64; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Supplier; /** * These are essentially flake ids but we use 6 (not 8) bytes for timestamp, and use 3 (not 2) bytes for sequence number. We also reorder @@ -20,36 +21,43 @@ * For more information about flake ids, check out * https://archive.fo/2015.07.08-082503/http://www.boundary.com/blog/2012/01/flake-a-decentralized-k-ordered-unique-id-generator-in-erlang/ */ - class TimeBasedUUIDGenerator implements UUIDGenerator { // We only use bottom 3 bytes for the sequence number. Paranoia: init with random int so that if JVM/OS/machine goes down, clock slips // backwards, and JVM comes back up, we are less likely to be on the same sequenceNumber at the same time: - protected final AtomicInteger sequenceNumber = new AtomicInteger(SecureRandomHolder.INSTANCE.nextInt()); + protected final AtomicInteger sequenceNumber; + protected final AtomicLong lastTimestamp; - // Used to ensure clock moves forward: - protected final AtomicLong lastTimestamp = new AtomicLong(0); + protected final Supplier timestampSupplier; private static final byte[] SECURE_MUNGED_ADDRESS = MacAddressProvider.getSecureMungedAddress(); static { assert SECURE_MUNGED_ADDRESS.length == 6; } - private static final Base64.Encoder BASE_64_NO_PADDING = Base64.getUrlEncoder().withoutPadding(); // protected for testing - protected long currentTimeMillis() { - return System.currentTimeMillis(); + static final int SIZE_IN_BYTES = 15; + private final byte[] macAddress; + + TimeBasedUUIDGenerator( + final Supplier timestampSupplier, + final Supplier sequenceIdSupplier, + final Supplier macAddressSupplier + ) { + this.timestampSupplier = timestampSupplier; + // NOTE: getting the mac address every time using the supplier is expensive, hence we cache it. + this.macAddress = macAddressSupplier.get(); + this.sequenceNumber = new AtomicInteger(sequenceIdSupplier.get()); + // Used to ensure clock moves forward: + this.lastTimestamp = new AtomicLong(0); } - // protected for testing protected byte[] macAddress() { - return SECURE_MUNGED_ADDRESS; + return macAddress; } - static final int SIZE_IN_BYTES = 15; - @Override public String getBase64UUID() { final int sequenceId = sequenceNumber.incrementAndGet() & 0xffffff; @@ -58,7 +66,7 @@ public String getBase64UUID() { // still vulnerable if we are shut down, clock goes backwards, and we restart... for this we // randomize the sequenceNumber on init to decrease chance of collision: long timestamp = this.lastTimestamp.accumulateAndGet( - currentTimeMillis(), + timestampSupplier.get(), // Always force the clock to increment whenever sequence number is 0, in case we have a long // time-slip backwards: sequenceId == 0 ? (lastTimestamp, currentTimeMillis) -> Math.max(lastTimestamp, currentTimeMillis) + 1 : Math::max diff --git a/server/src/main/java/org/elasticsearch/common/UUIDs.java b/server/src/main/java/org/elasticsearch/common/UUIDs.java index 0f73b8172c10f..ebcb375bc01bc 100644 --- a/server/src/main/java/org/elasticsearch/common/UUIDs.java +++ b/server/src/main/java/org/elasticsearch/common/UUIDs.java @@ -12,13 +12,29 @@ import org.elasticsearch.common.settings.SecureString; import java.util.Random; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Supplier; +/** + * Utility class for generating various types of UUIDs. + */ public class UUIDs { + private static final AtomicInteger sequenceNumber = new AtomicInteger(SecureRandomHolder.INSTANCE.nextInt()); + public static final Supplier DEFAULT_TIMESTAMP_SUPPLIER = System::currentTimeMillis; + public static final Supplier DEFAULT_SEQUENCE_ID_SUPPLIER = sequenceNumber::incrementAndGet; + public static final Supplier DEFAULT_MAC_ADDRESS_SUPPLIER = MacAddressProvider::getSecureMungedAddress; + private static final UUIDGenerator RANDOM_UUID_GENERATOR = new RandomBasedUUIDGenerator(); + private static final UUIDGenerator TIME_BASED_K_ORDERED_GENERATOR = new TimeBasedKOrderedUUIDGenerator( + DEFAULT_TIMESTAMP_SUPPLIER, + DEFAULT_SEQUENCE_ID_SUPPLIER, + DEFAULT_MAC_ADDRESS_SUPPLIER + ); - private static final RandomBasedUUIDGenerator RANDOM_UUID_GENERATOR = new RandomBasedUUIDGenerator(); - - private static final UUIDGenerator TIME_BASED_K_ORDERED_GENERATOR = new TimeBasedKOrderedUUIDGenerator(); - private static final UUIDGenerator TIME_UUID_GENERATOR = new TimeBasedUUIDGenerator(); + private static final UUIDGenerator TIME_UUID_GENERATOR = new TimeBasedUUIDGenerator( + DEFAULT_TIMESTAMP_SUPPLIER, + DEFAULT_SEQUENCE_ID_SUPPLIER, + DEFAULT_MAC_ADDRESS_SUPPLIER + ); /** * The length of a UUID string generated by {@link #base64UUID}. diff --git a/server/src/test/java/org/elasticsearch/common/TimeBasedUUIDGeneratorTests.java b/server/src/test/java/org/elasticsearch/common/TimeBasedUUIDGeneratorTests.java new file mode 100644 index 0000000000000..964683a1972ba --- /dev/null +++ b/server/src/test/java/org/elasticsearch/common/TimeBasedUUIDGeneratorTests.java @@ -0,0 +1,270 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.common; + +import org.elasticsearch.test.ESTestCase; + +import java.time.Instant; +import java.time.temporal.ChronoUnit; +import java.util.Base64; +import java.util.HashSet; +import java.util.Set; +import java.util.function.Supplier; +import java.util.stream.IntStream; + +public class TimeBasedUUIDGeneratorTests extends ESTestCase { + + public void testTimeBasedUUIDGeneration() { + assertUUIDFormat(createGenerator(() -> Instant.now().toEpochMilli(), () -> 0, new TestRandomMacAddressSupplier()), 100_000); + } + + public void testTimeBasedUUIDUniqueness() { + assertUUIDUniqueness(createGenerator(() -> Instant.now().toEpochMilli(), () -> 0, new TestRandomMacAddressSupplier()), 100_000); + } + + public void testTimeBasedUUIDSequenceOverflow() { + // The assumption here is that our system will not generate more than 1000 UUIDs within the same millisecond. + // The sequence ID is set close to its max value (0x00FF_FFFF) to quickly trigger an overflow. + // However, since we are generating only 1000 UUIDs, the timestamp is expected to change at least once, + // ensuring uniqueness even if the sequence ID wraps around. + assertEquals( + 1000, + generateUUIDs( + createGenerator(() -> Instant.now().toEpochMilli(), () -> 0x00FF_FFFF - 10, new TestRandomMacAddressSupplier()), + 1000 + ).size() + ); + } + + public void testTimeBasedUUIDClockReset() { + // Simulate a clock that resets itself after reaching a threshold. + final Supplier unreliableClock = new TestClockResetTimestampSupplier( + Instant.now(), + 1, + 50, + ChronoUnit.MILLIS, + Instant.now().plus(100, ChronoUnit.MILLIS) + ); + final UUIDGenerator generator = createGenerator(unreliableClock, () -> 0, new TestRandomMacAddressSupplier()); + + final Set beforeReset = generateUUIDs(generator, 5_000); + final Set afterReset = generateUUIDs(generator, 5_000); + + // Ensure all UUIDs are unique, even after the clock resets. + assertEquals(5_000, beforeReset.size()); + assertEquals(5_000, afterReset.size()); + beforeReset.addAll(afterReset); + assertEquals(10_000, beforeReset.size()); + } + + public void testKOrderedUUIDGeneration() { + assertUUIDFormat(createKOrderedGenerator(() -> Instant.now().toEpochMilli(), () -> 0, new TestRandomMacAddressSupplier()), 100_000); + } + + public void testKOrderedUUIDUniqueness() { + assertUUIDUniqueness( + createKOrderedGenerator(() -> Instant.now().toEpochMilli(), () -> 0, new TestRandomMacAddressSupplier()), + 100_000 + ); + } + + public void testKOrderedUUIDSequenceOverflow() { + final UUIDGenerator generator = createKOrderedGenerator( + () -> Instant.now().toEpochMilli(), + () -> 0x00FF_FFFF - 10, + new TestRandomMacAddressSupplier() + ); + final Set uuids = generateUUIDs(generator, 1000); + + // The assumption here is that our system will not generate more than 1000 UUIDs within the same millisecond. + // The sequence ID is set close to its max value (0x00FF_FFFF) to quickly trigger an overflow. + // However, since we are generating only 1000 UUIDs, the timestamp is expected to change at least once, + // ensuring uniqueness even if the sequence ID wraps around. + assertEquals(1000, uuids.size()); + } + + public void testUUIDEncodingDecoding() { + testUUIDEncodingDecodingHelper( + Instant.parse("2024-11-13T10:12:43Z").toEpochMilli(), + 12345, + new TestRandomMacAddressSupplier().get() + ); + } + + public void testUUIDEncodingDecodingWithRandomValues() { + testUUIDEncodingDecodingHelper( + randomInstantBetween(Instant.now().minus(1, ChronoUnit.DAYS), Instant.now()).toEpochMilli(), + randomIntBetween(0, 0x00FF_FFFF), + new TestRandomMacAddressSupplier().get() + ); + } + + private void testUUIDEncodingDecodingHelper(final long timestamp, final int sequenceId, final byte[] macAddress) { + final TestTimeBasedKOrderedUUIDDecoder decoder = new TestTimeBasedKOrderedUUIDDecoder( + createKOrderedGenerator(() -> timestamp, () -> sequenceId, () -> macAddress).getBase64UUID() + ); + + // The sequence ID is incremented by 1 when generating the UUID. + assertEquals("Sequence ID does not match", sequenceId + 1, decoder.decodeSequenceId()); + // Truncate the timestamp to milliseconds to match the UUID generation granularity. + assertEquals( + "Timestamp does not match", + Instant.ofEpochMilli(timestamp).truncatedTo(ChronoUnit.MILLIS), + Instant.ofEpochMilli(decoder.decodeTimestamp()).truncatedTo(ChronoUnit.MILLIS) + ); + assertArrayEquals("MAC address does not match", macAddress, decoder.decodeMacAddress()); + } + + private void assertUUIDUniqueness(final UUIDGenerator generator, final int count) { + assertEquals(count, generateUUIDs(generator, count).size()); + } + + private Set generateUUIDs(final UUIDGenerator generator, final int count) { + return IntStream.range(0, count).mapToObj(i -> generator.getBase64UUID()).collect(HashSet::new, Set::add, Set::addAll); + } + + private void assertUUIDFormat(final UUIDGenerator generator, final int count) { + IntStream.range(0, count).forEach(i -> { + final String uuid = generator.getBase64UUID(); + assertNotNull(uuid); + assertEquals(20, uuid.length()); + assertFalse(uuid.contains("+")); + assertFalse(uuid.contains("/")); + assertFalse(uuid.contains("=")); + }); + } + + private UUIDGenerator createGenerator( + final Supplier timestampSupplier, + final Supplier sequenceIdSupplier, + final Supplier macAddressSupplier + ) { + return new TimeBasedUUIDGenerator(timestampSupplier, sequenceIdSupplier, macAddressSupplier); + } + + private UUIDGenerator createKOrderedGenerator( + final Supplier timestampSupplier, + final Supplier sequenceIdSupplier, + final Supplier macAddressSupplier + ) { + return new TimeBasedKOrderedUUIDGenerator(timestampSupplier, sequenceIdSupplier, macAddressSupplier); + } + + private static class TestRandomMacAddressSupplier implements Supplier { + private final byte[] macAddress = new byte[] { randomByte(), randomByte(), randomByte(), randomByte(), randomByte(), randomByte() }; + + @Override + public byte[] get() { + return macAddress; + } + } + + /** + * A {@link Supplier} implementation that simulates a clock that can move forward or backward in time. + * This supplier provides timestamps in milliseconds since the epoch, adjusting based on a given delta + * until a reset threshold is reached. After crossing the threshold, the timestamp moves backwards by a reset delta. + */ + private static class TestClockResetTimestampSupplier implements Supplier { + private Instant currentTime; + private final long delta; + private final long resetDelta; + private final ChronoUnit unit; + private final Instant resetThreshold; + + /** + * Constructs a new {@link TestClockResetTimestampSupplier}. + * + * @param startTime The initial starting time. + * @param delta The amount of time to add to the current time in each forward step. + * @param resetDelta The amount of time to subtract once the reset threshold is reached. + * @param unit The unit of time for both delta and resetDelta. + * @param resetThreshold The threshold after which the time is reset backwards. + */ + TestClockResetTimestampSupplier( + final Instant startTime, + final long delta, + final long resetDelta, + final ChronoUnit unit, + final Instant resetThreshold + ) { + this.currentTime = startTime; + this.delta = delta; + this.resetDelta = resetDelta; + this.unit = unit; + this.resetThreshold = resetThreshold; + } + + /** + * Provides the next timestamp in milliseconds since the epoch. + * If the current time is before the reset threshold, it advances the time by the delta. + * Otherwise, it subtracts the reset delta. + * + * @return The current time in milliseconds since the epoch. + */ + @Override + public Long get() { + if (currentTime.isBefore(resetThreshold)) { + currentTime = currentTime.plus(delta, unit); + } else { + currentTime = currentTime.minus(resetDelta, unit); + } + return currentTime.toEpochMilli(); + } + } + + /** + * A utility class to decode the K-ordered UUID extracting the original timestamp, MAC address and sequence ID. + */ + private static class TestTimeBasedKOrderedUUIDDecoder { + + private final byte[] decodedBytes; + + /** + * Constructs a new {@link TestTimeBasedKOrderedUUIDDecoder} using a base64-encoded UUID string. + * + * @param base64UUID The base64-encoded UUID string to decode. + */ + TestTimeBasedKOrderedUUIDDecoder(final String base64UUID) { + this.decodedBytes = Base64.getUrlDecoder().decode(base64UUID); + } + + /** + * Decodes the timestamp from the UUID using the following bytes: + * 0 (most significant), 1, 2, 3, 11, 13 (least significant). + * + * @return The decoded timestamp in milliseconds. + */ + public long decodeTimestamp() { + return ((long) (decodedBytes[0] & 0xFF) << 40) | ((long) (decodedBytes[1] & 0xFF) << 32) | ((long) (decodedBytes[2] & 0xFF) + << 24) | ((long) (decodedBytes[3] & 0xFF) << 16) | ((long) (decodedBytes[11] & 0xFF) << 8) | (decodedBytes[13] & 0xFF); + } + + /** + * Decodes the MAC address from the UUID using bytes 4 to 9. + * + * @return The decoded MAC address as a byte array. + */ + public byte[] decodeMacAddress() { + byte[] macAddress = new byte[6]; + System.arraycopy(decodedBytes, 4, macAddress, 0, 6); + return macAddress; + } + + /** + * Decodes the sequence ID from the UUID using bytes: + * 10 (most significant), 12 (middle), 14 (least significant). + * + * @return The decoded sequence ID. + */ + public int decodeSequenceId() { + return ((decodedBytes[10] & 0xFF) << 16) | ((decodedBytes[12] & 0xFF) << 8) | (decodedBytes[14] & 0xFF); + } + } +} diff --git a/server/src/test/java/org/elasticsearch/common/UUIDTests.java b/server/src/test/java/org/elasticsearch/common/UUIDTests.java index 9fbeaf1c6c081..71c705f5df511 100644 --- a/server/src/test/java/org/elasticsearch/common/UUIDTests.java +++ b/server/src/test/java/org/elasticsearch/common/UUIDTests.java @@ -27,26 +27,37 @@ import org.elasticsearch.test.ESTestCase; import org.hamcrest.Matchers; +import java.util.Base64; import java.util.HashSet; import java.util.Random; import java.util.Set; +import java.util.function.Supplier; public class UUIDTests extends ESTestCase { - static UUIDGenerator timeUUIDGen = new TimeBasedUUIDGenerator(); + static final Base64.Decoder BASE_64_URL_DECODER = Base64.getUrlDecoder(); + static UUIDGenerator timeUUIDGen = new TimeBasedUUIDGenerator( + UUIDs.DEFAULT_TIMESTAMP_SUPPLIER, + UUIDs.DEFAULT_SEQUENCE_ID_SUPPLIER, + UUIDs.DEFAULT_MAC_ADDRESS_SUPPLIER + ); static UUIDGenerator randomUUIDGen = new RandomBasedUUIDGenerator(); - static UUIDGenerator kOrderedUUIDGen = new TimeBasedKOrderedUUIDGenerator(); + static UUIDGenerator kOrderedUUIDGen = new TimeBasedKOrderedUUIDGenerator( + UUIDs.DEFAULT_TIMESTAMP_SUPPLIER, + UUIDs.DEFAULT_SEQUENCE_ID_SUPPLIER, + UUIDs.DEFAULT_MAC_ADDRESS_SUPPLIER + ); public void testRandomUUID() { - verifyUUIDSet(100000, randomUUIDGen); + verifyUUIDSet(100000, randomUUIDGen).forEach(this::verifyUUIDIsUrlSafe); } public void testTimeUUID() { - verifyUUIDSet(100000, timeUUIDGen); + verifyUUIDSet(100000, timeUUIDGen).forEach(this::verifyUUIDIsUrlSafe); } public void testKOrderedUUID() { - verifyUUIDSet(100000, kOrderedUUIDGen); + verifyUUIDSet(100000, kOrderedUUIDGen).forEach(this::verifyUUIDIsUrlSafe); } public void testThreadedRandomUUID() { @@ -143,6 +154,7 @@ public void testUUIDThreaded(UUIDGenerator uuidSource) { globalSet.addAll(runner.uuidSet); } assertEquals(count * uuids, globalSet.size()); + globalSet.forEach(this::verifyUUIDIsUrlSafe); } private static double testCompression(final UUIDGenerator generator, int numDocs, int numDocsPerSecond, int numNodes, Logger logger) @@ -158,35 +170,25 @@ private static double testCompression(final UUIDGenerator generator, int numDocs UUIDGenerator uuidSource = generator; if (generator instanceof TimeBasedUUIDGenerator) { if (generator instanceof TimeBasedKOrderedUUIDGenerator) { - uuidSource = new TimeBasedKOrderedUUIDGenerator() { + uuidSource = new TimeBasedKOrderedUUIDGenerator(new Supplier<>() { double currentTimeMillis = TestUtil.nextLong(random(), 0L, 10000000000L); @Override - protected long currentTimeMillis() { + public Long get() { currentTimeMillis += intervalBetweenDocs * 2 * r.nextDouble(); return (long) currentTimeMillis; } - - @Override - protected byte[] macAddress() { - return RandomPicks.randomFrom(r, macAddresses); - } - }; + }, () -> 0, () -> RandomPicks.randomFrom(r, macAddresses)); } else { - uuidSource = new TimeBasedUUIDGenerator() { + uuidSource = new TimeBasedUUIDGenerator(new Supplier<>() { double currentTimeMillis = TestUtil.nextLong(random(), 0L, 10000000000L); @Override - protected long currentTimeMillis() { + public Long get() { currentTimeMillis += intervalBetweenDocs * 2 * r.nextDouble(); return (long) currentTimeMillis; } - - @Override - protected byte[] macAddress() { - return RandomPicks.randomFrom(r, macAddresses); - } - }; + }, () -> 0, () -> RandomPicks.randomFrom(r, macAddresses)); } } @@ -237,4 +239,13 @@ public void testStringLength() { private static int getUnpaddedBase64StringLength(int sizeInBytes) { return (int) Math.ceil(sizeInBytes * 4.0 / 3.0); } + + private void verifyUUIDIsUrlSafe(final String uuid) { + assertFalse("UUID should not contain padding characters: " + uuid, uuid.contains("=")); + try { + BASE_64_URL_DECODER.decode(uuid); + } catch (IllegalArgumentException e) { + throw new AssertionError("UUID is not a valid Base64 URL-safe encoded string: " + uuid); + } + } }