diff --git a/modules/core/src/main/java/org/apache/ignite/internal/managers/communication/GridIoMessageFactory.java b/modules/core/src/main/java/org/apache/ignite/internal/managers/communication/GridIoMessageFactory.java index d8d62d4595a96..b5ae4f63135e4 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/managers/communication/GridIoMessageFactory.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/managers/communication/GridIoMessageFactory.java @@ -53,6 +53,7 @@ import org.apache.ignite.internal.processors.cache.GridCacheReturn; import org.apache.ignite.internal.processors.cache.GridChangeGlobalStateMessageResponse; import org.apache.ignite.internal.processors.cache.KeyCacheObjectImpl; +import org.apache.ignite.internal.processors.cache.TombstoneCacheObject; import org.apache.ignite.internal.processors.cache.WalStateAckMessage; import org.apache.ignite.internal.processors.cache.binary.MetadataRequestMessage; import org.apache.ignite.internal.processors.cache.binary.MetadataResponseMessage; @@ -1166,6 +1167,11 @@ public GridIoMessageFactory(MessageFactory[] ext) { break; + case 176: + msg = TombstoneCacheObject.INSTANCE; + + break; + // [-3..119] [124..129] [-23..-28] [-36..-55] - this // [120..123] - DR // [-4..-22, -30..-35] - SQL diff --git a/modules/core/src/main/java/org/apache/ignite/internal/pagemem/PageUtils.java b/modules/core/src/main/java/org/apache/ignite/internal/pagemem/PageUtils.java index 217164cf39790..0b9b1b47029d4 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/pagemem/PageUtils.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/pagemem/PageUtils.java @@ -56,8 +56,8 @@ public static int getUnsignedByte(long addr, int off) { */ public static byte[] getBytes(long addr, int off, int len) { assert addr > 0 : addr; - assert off >= 0; - assert len >= 0; + assert off >= 0 : off; + assert len >= 0 : len; byte[] bytes = new byte[len]; diff --git a/modules/core/src/main/java/org/apache/ignite/internal/pagemem/wal/record/WALRecord.java b/modules/core/src/main/java/org/apache/ignite/internal/pagemem/wal/record/WALRecord.java index 269b28434816a..405e6305ba8ab 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/pagemem/wal/record/WALRecord.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/pagemem/wal/record/WALRecord.java @@ -220,8 +220,11 @@ public enum RecordType { /** Rollback tx record. */ ROLLBACK_TX_RECORD (57, LOGICAL), - /** */ - PARTITION_META_PAGE_UPDATE_COUNTERS_V2 (58, PHYSICAL); + /** Partition meta page containing update counter gaps. */ + PARTITION_META_PAGE_UPDATE_COUNTERS_V2 (58, PHYSICAL), + + /** Partition meta page containing tombstone presence flag. */ + PARTITION_META_PAGE_UPDATE_COUNTERS_V3 (59, PHYSICAL); /** Index for serialization. Should be consistent throughout all versions. */ private final int idx; diff --git a/modules/core/src/main/java/org/apache/ignite/internal/pagemem/wal/record/delta/MetaPageUpdatePartitionDataRecord.java b/modules/core/src/main/java/org/apache/ignite/internal/pagemem/wal/record/delta/MetaPageUpdatePartitionDataRecord.java index 3e2b67bd3c538..0c9f7fcd76e0c 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/pagemem/wal/record/delta/MetaPageUpdatePartitionDataRecord.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/pagemem/wal/record/delta/MetaPageUpdatePartitionDataRecord.java @@ -46,7 +46,7 @@ public class MetaPageUpdatePartitionDataRecord extends PageDeltaRecord { private int allocatedIdxCandidate; /** */ - private long cntrsPageId; + private long cacheSizesPageId; /** * @param grpId Cache group ID. @@ -59,9 +59,10 @@ public MetaPageUpdatePartitionDataRecord( long updateCntr, long globalRmvId, int partSize, - long cntrsPageId, + long cacheSizesPageId, byte state, - int allocatedIdxCandidate) { + int allocatedIdxCandidate + ) { super(grpId, pageId); this.updateCntr = updateCntr; @@ -69,7 +70,7 @@ public MetaPageUpdatePartitionDataRecord( this.partSize = partSize; this.state = state; this.allocatedIdxCandidate = allocatedIdxCandidate; - this.cntrsPageId = cntrsPageId; + this.cacheSizesPageId = cacheSizesPageId; } /** @@ -81,7 +82,7 @@ public MetaPageUpdatePartitionDataRecord(DataInput in) throws IOException{ this.updateCntr = in.readLong(); this.globalRmvId = in.readLong(); this.partSize = in.readInt(); - this.cntrsPageId = in.readLong(); + this.cacheSizesPageId = in.readLong(); this.state = in.readByte(); this.allocatedIdxCandidate = in.readInt(); } @@ -110,8 +111,8 @@ public int partitionSize() { /** * @return Partition size. */ - public long countersPageId() { - return cntrsPageId; + public long cacheSizesPageId() { + return cacheSizesPageId; } /** @@ -128,7 +129,7 @@ public byte state() { io.setUpdateCounter(pageAddr, updateCntr); io.setGlobalRemoveId(pageAddr, globalRmvId); io.setSize(pageAddr, partSize); - io.setCountersPageId(pageAddr, cntrsPageId); + io.setSizesPageId(pageAddr, cacheSizesPageId); io.setPartitionState(pageAddr, state); io.setCandidatePageCount(pageAddr, allocatedIdxCandidate); } @@ -150,7 +151,7 @@ public void toBytes(ByteBuffer buf) { buf.putLong(updateCounter()); buf.putLong(globalRemoveId()); buf.putInt(partitionSize()); - buf.putLong(countersPageId()); + buf.putLong(cacheSizesPageId()); buf.put(state()); buf.putInt(allocatedIndexCandidate()); } diff --git a/modules/core/src/main/java/org/apache/ignite/internal/pagemem/wal/record/delta/MetaPageUpdatePartitionDataRecordV2.java b/modules/core/src/main/java/org/apache/ignite/internal/pagemem/wal/record/delta/MetaPageUpdatePartitionDataRecordV2.java index ab3ccf8f35a37..a8a859764531e 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/pagemem/wal/record/delta/MetaPageUpdatePartitionDataRecordV2.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/pagemem/wal/record/delta/MetaPageUpdatePartitionDataRecordV2.java @@ -28,11 +28,12 @@ import org.apache.ignite.internal.util.typedef.internal.S; /** - * + * Partition meta page delta record. + * Contains reference to update counters gaps. */ public class MetaPageUpdatePartitionDataRecordV2 extends MetaPageUpdatePartitionDataRecord { /** */ - private long link; + private long gapsLink; /** * @param grpId Group id. @@ -43,7 +44,7 @@ public class MetaPageUpdatePartitionDataRecordV2 extends MetaPageUpdatePartition * @param cntrsPageId Cntrs page id. * @param state State. * @param allocatedIdxCandidate Allocated index candidate. - * @param link Link. + * @param gapsLink Link. */ public MetaPageUpdatePartitionDataRecordV2( int grpId, @@ -54,9 +55,10 @@ public MetaPageUpdatePartitionDataRecordV2( long cntrsPageId, byte state, int allocatedIdxCandidate, - long link) { + long gapsLink + ) { super(grpId, pageId, updateCntr, globalRmvId, partSize, cntrsPageId, state, allocatedIdxCandidate); - this.link = link; + this.gapsLink = gapsLink; } /** @@ -65,7 +67,7 @@ public MetaPageUpdatePartitionDataRecordV2( public MetaPageUpdatePartitionDataRecordV2(DataInput in) throws IOException { super(in); - this.link = in.readLong(); + this.gapsLink = in.readLong(); } /** {@inheritDoc} */ @@ -74,21 +76,21 @@ public MetaPageUpdatePartitionDataRecordV2(DataInput in) throws IOException { PagePartitionMetaIOV2 io = (PagePartitionMetaIOV2)PagePartitionMetaIO.VERSIONS.forPage(pageAddr); - io.setGapsLink(pageAddr, link); + io.setGapsLink(pageAddr, gapsLink); } /** * */ - public long link() { - return link; + public long gapsLink() { + return gapsLink; } /** {@inheritDoc} */ @Override public void toBytes(ByteBuffer buf) { super.toBytes(buf); - buf.putLong(link()); + buf.putLong(gapsLink()); } /** {@inheritDoc} */ diff --git a/modules/core/src/main/java/org/apache/ignite/internal/pagemem/wal/record/delta/MetaPageUpdatePartitionDataRecordV3.java b/modules/core/src/main/java/org/apache/ignite/internal/pagemem/wal/record/delta/MetaPageUpdatePartitionDataRecordV3.java new file mode 100644 index 0000000000000..1263c43db32fd --- /dev/null +++ b/modules/core/src/main/java/org/apache/ignite/internal/pagemem/wal/record/delta/MetaPageUpdatePartitionDataRecordV3.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.internal.pagemem.wal.record.delta; + +import java.io.DataInput; +import java.io.IOException; +import java.nio.ByteBuffer; +import org.apache.ignite.IgniteCheckedException; +import org.apache.ignite.internal.pagemem.PageIdUtils; +import org.apache.ignite.internal.pagemem.PageMemory; +import org.apache.ignite.internal.processors.cache.persistence.tree.io.PagePartitionMetaIO; +import org.apache.ignite.internal.processors.cache.persistence.tree.io.PagePartitionMetaIOV2; +import org.apache.ignite.internal.util.typedef.internal.S; + +/** + * Partition meta page delta record. + * Contains information about tombstones count. + */ +public class MetaPageUpdatePartitionDataRecordV3 extends MetaPageUpdatePartitionDataRecordV2 { + /** Tombstones count. */ + private long tombstonesCnt; + + /** + * @param grpId Group id. + * @param pageId Page id. + * @param updateCntr Update counter. + * @param globalRmvId Global remove id. + * @param partSize Partition size. + * @param cacheSizesPageId Cache sizes page id. + * @param state State. + * @param allocatedIdxCandidate Allocated index candidate. + * @param gapsLink Gaps link. + * @param tombstonesCnt Tombstones count. + */ + public MetaPageUpdatePartitionDataRecordV3( + int grpId, + long pageId, + long updateCntr, + long globalRmvId, + int partSize, + long cacheSizesPageId, + byte state, + int allocatedIdxCandidate, + long gapsLink, + long tombstonesCnt + ) { + super(grpId, pageId, updateCntr, globalRmvId, partSize, cacheSizesPageId, state, allocatedIdxCandidate, gapsLink); + this.tombstonesCnt = tombstonesCnt; + } + + /** + * @param in In. + */ + public MetaPageUpdatePartitionDataRecordV3(DataInput in) throws IOException { + super(in); + + this.tombstonesCnt = in.readLong(); + } + + /** + * @return Tombstones count. + */ + public long tombstonesCount() { + return tombstonesCnt; + } + + /** {@inheritDoc} */ + @Override public void applyDelta(PageMemory pageMem, long pageAddr) throws IgniteCheckedException { + super.applyDelta(pageMem, pageAddr); + + PagePartitionMetaIOV2 io = (PagePartitionMetaIOV2) PagePartitionMetaIO.VERSIONS.forPage(pageAddr); + + io.setTombstonesCount(pageAddr, tombstonesCnt); + } + + /** {@inheritDoc} */ + @Override public void toBytes(ByteBuffer buf) { + super.toBytes(buf); + + buf.putLong(tombstonesCnt); + } + + /** {@inheritDoc} */ + @Override public RecordType type() { + return RecordType.PARTITION_META_PAGE_UPDATE_COUNTERS_V3; + } + + /** {@inheritDoc} */ + @Override public String toString() { + return S.toString(MetaPageUpdatePartitionDataRecordV2.class, this, "partId", PageIdUtils.partId(pageId()), + "super", super.toString()); + } +} diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/CacheGroupContext.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/CacheGroupContext.java index ad3557087a237..e753edb381a90 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/CacheGroupContext.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/CacheGroupContext.java @@ -46,6 +46,8 @@ import org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtAffinityAssignmentRequest; import org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtAffinityAssignmentResponse; import org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPreloader; +import org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition; +import org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState; import org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology; import org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopologyImpl; import org.apache.ignite.internal.processors.cache.persistence.DataRegion; @@ -261,6 +263,8 @@ public class CacheGroupContext { statHolderIdx = new IoStatisticsHolderIndex(HASH_INDEX, cacheOrGroupName(), HASH_PK_IDX_NAME, mmgr); statHolderData = new IoStatisticsHolderCache(cacheOrGroupName(), grpId, mmgr); } + + hasAtomicCaches = ccfg.getAtomicityMode() == ATOMIC; } /** @@ -1298,6 +1302,21 @@ public boolean hasAtomicCaches() { return hasAtomicCaches; } + /** + * @return {@code True} if need create temporary tombstones entries for removed data. + */ + public boolean supportsTombstone() { + return !mvccEnabled && !isLocal(); + } + + /** + * @param part Partition. + * @return {@code True} if need create tombstone for remove in given partition. + */ + public boolean shouldCreateTombstone(@Nullable GridDhtLocalPartition part) { + return part != null && supportsTombstone() && part.state() == GridDhtPartitionState.MOVING; + } + /** * @return Metrics. */ diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/CacheGroupMetricsImpl.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/CacheGroupMetricsImpl.java index e82e451ab4aa0..fab2e1fd96181 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/CacheGroupMetricsImpl.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/CacheGroupMetricsImpl.java @@ -70,6 +70,7 @@ public class CacheGroupMetricsImpl { private final LongMetric sparseStorageSize; /** Interface describing a predicate of two integers. */ + @FunctionalInterface private interface IntBiPredicate { /** * Predicate body. @@ -169,6 +170,10 @@ public void onTopologyInitialized() { mreg.register("TotalAllocatedSize", this::getTotalAllocatedSize, "Total size of memory allocated for group, in bytes."); + + mreg.register("Tombstones", + this::getTombstones, + "Number of tombstone entries."); } /** */ @@ -253,20 +258,12 @@ private int numberOfPartitionCopies(IntBiPredicate pred) { /** */ public int getMinimumNumberOfPartitionCopies() { - return numberOfPartitionCopies(new IntBiPredicate() { - @Override public boolean apply(int targetVal, int nextVal) { - return nextVal < targetVal; - } - }); + return numberOfPartitionCopies((targetVal, nextVal) -> nextVal < targetVal); } /** */ public int getMaximumNumberOfPartitionCopies() { - return numberOfPartitionCopies(new IntBiPredicate() { - @Override public boolean apply(int targetVal, int nextVal) { - return nextVal > targetVal; - } - }); + return numberOfPartitionCopies((targetVal, nextVal) -> nextVal > targetVal); } /** @@ -462,6 +459,12 @@ public long getSparseStorageSize() { return sparseStorageSize == null ? 0 : sparseStorageSize.value(); } + /** */ + public long getTombstones() { + return ctx.topology().localPartitions().stream() + .map(part -> part.dataStore().tombstonesCount()).reduce(Long::sum).orElse(0L); + } + /** Removes all metric for cache group. */ public void remove() { ctx.shared().kernalContext().metric().remove(metricGroupName()); diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/CacheObject.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/CacheObject.java index f9f384a7f9702..5e89926f621fc 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/CacheObject.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/CacheObject.java @@ -38,6 +38,9 @@ public interface CacheObject extends Message { /** */ public static final byte TYPE_BINARY_ENUM = 101; + /** */ + public static final byte TOMBSTONE = -1; + /** * @param ctx Context. * @param cpy If {@code true} need to copy value. diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheContext.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheContext.java index 9ddafb2a90b31..431118cf36e8a 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheContext.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheContext.java @@ -625,8 +625,12 @@ public byte ioPolicy() { public void cache(GridCacheAdapter cache) { this.cache = cache; - deferredDel = cache.isDht() || cache.isDhtAtomic() || cache.isColocated() || - (cache.isNear() && cache.configuration().getAtomicityMode() == ATOMIC); + if (grp.supportsTombstone() && cache.configuration().getAtomicityMode() == TRANSACTIONAL) + deferredDel = false; + else { + deferredDel = (cache.isDht() || cache.isDhtAtomic() || cache.isColocated() || + (cache.isNear() && cache.configuration().getAtomicityMode() == ATOMIC)); + } } /** diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheMapEntry.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheMapEntry.java index f9388d24cc2cc..6936b980434bb 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheMapEntry.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheMapEntry.java @@ -98,9 +98,9 @@ import org.apache.ignite.internal.util.typedef.internal.CU; import org.apache.ignite.internal.util.typedef.internal.S; import org.apache.ignite.internal.util.typedef.internal.U; +import org.apache.ignite.lang.IgniteBiPredicate; import org.apache.ignite.lang.IgniteBiTuple; import org.apache.ignite.lang.IgniteInClosure; -import org.apache.ignite.lang.IgnitePredicate; import org.apache.ignite.lang.IgniteUuid; import org.apache.ignite.thread.IgniteThread; import org.jetbrains.annotations.NotNull; @@ -1713,14 +1713,19 @@ protected Object keyValue(boolean cpy) { interceptRes = cctx.config().getInterceptor().onBeforeRemove(entry0); - if (cctx.cancelRemove(interceptRes)) { - CacheObject ret = cctx.toCacheObject(cctx.unwrapTemporary(interceptRes.get2())); - + if (cctx.cancelRemove(interceptRes)) return new GridCacheUpdateTxResult(false, logPtr); - } } - removeValue(); + if (cctx.group().shouldCreateTombstone(localPartition())) { + cctx.offheap().removeWithTombstone(cctx, key, newVer, localPartition()); + + // Partition may change his state during remove. + if (!cctx.group().shouldCreateTombstone(localPartition())) + removeTombstone0(newVer); + } + else + removeValue(); update(null, 0, 0, newVer, true); @@ -2817,6 +2822,34 @@ protected void clearReader(UUID nodeId) throws GridCacheEntryRemovedException { return marked; } + /** + * @param tombstoneVer Tombstone version. + * @throws GridCacheEntryRemovedException If entry was removed. + * @throws IgniteCheckedException If failed. + */ + public void removeTombstone(GridCacheVersion tombstoneVer) throws GridCacheEntryRemovedException, IgniteCheckedException { + lockEntry(); + + try { + checkObsolete(); + + removeTombstone0(tombstoneVer); + } + finally { + unlockEntry(); + } + } + + /** + * @param tombstoneVer Tombstone version. + * @throws IgniteCheckedException If failed. + */ + private void removeTombstone0(GridCacheVersion tombstoneVer) throws IgniteCheckedException { + RemoveClosure c = new RemoveClosure(this, tombstoneVer); + + cctx.offheap().invoke(cctx, key, localPartition(), c); + } + /** * @return {@code True} if this entry should not be evicted from cache. */ @@ -3337,20 +3370,18 @@ private boolean skipInterceptor(@Nullable GridCacheVersion explicitVer) { boolean update; - IgnitePredicate p = new IgnitePredicate() { - @Override public boolean apply(@Nullable CacheDataRow row) { + IgniteBiPredicate p = new IgniteBiPredicate() { + @Override public boolean apply(@Nullable CacheObject val, GridCacheVersion currVer) { boolean update0; - GridCacheVersion currentVer = row != null ? row.version() : GridCacheMapEntry.this.ver; - - boolean isStartVer = cctx.shared().versions().isStartVersion(currentVer); + boolean isStartVer = cctx.shared().versions().isStartVersion(currVer); if (cctx.group().persistenceEnabled()) { if (!isStartVer) { if (cctx.atomic()) - update0 = ATOMIC_VER_COMPARATOR.compare(currentVer, ver) < 0; + update0 = ATOMIC_VER_COMPARATOR.compare(currVer, ver) < 0; else - update0 = currentVer.compareTo(ver) < 0; + update0 = currVer.compareTo(ver) < 0; } else update0 = true; @@ -3358,14 +3389,15 @@ private boolean skipInterceptor(@Nullable GridCacheVersion explicitVer) { else update0 = isStartVer; - update0 |= (!preload && deletedUnlocked()); + // Such combination may exist during datastreamer first update. + update0 |= (!preload && val == null); return update0; } }; if (unswapped) { - update = p.apply(null); + update = p.apply(this.val, this.ver); if (update) { // If entry is already unswapped and we are modifying it, we must run deletion callbacks for old value. @@ -3396,7 +3428,7 @@ else if (val == null) // cannot identify whether the entry is exist on the fly unswap(false); - if (update = p.apply(null)) { + if (update = p.apply(this.val, this.ver)) { // If entry is already unswapped and we are modifying it, we must run deletion callbacks for old value. long oldExpTime = expireTimeUnlocked(); long delta = (oldExpTime == 0 ? 0 : oldExpTime - U.currentTimeMillis()); @@ -4256,9 +4288,11 @@ private IgniteTxLocalAdapter currentTx() { * @param ver New entry version. * @throws IgniteCheckedException If update failed. */ - protected boolean storeValue(@Nullable CacheObject val, + protected boolean storeValue( + @Nullable CacheObject val, long expireTime, - GridCacheVersion ver) throws IgniteCheckedException { + GridCacheVersion ver + ) throws IgniteCheckedException { return storeValue(val, expireTime, ver, null, null); } @@ -4268,26 +4302,26 @@ protected boolean storeValue(@Nullable CacheObject val, * @param val Value. * @param expireTime Expire time. * @param ver New entry version. - * @param predicate Optional predicate. + * @param p Optional predicate. * @param row Pre-created data row, associated with this cache entry. * @return {@code True} if storage was modified. * @throws IgniteCheckedException If update failed. */ - protected boolean storeValue( + private boolean storeValue( @Nullable CacheObject val, long expireTime, GridCacheVersion ver, - @Nullable IgnitePredicate predicate, + @Nullable IgniteBiPredicate p, @Nullable CacheDataRow row ) throws IgniteCheckedException { assert lock.isHeldByCurrentThread(); assert localPartition() == null || localPartition().state() != RENTING : localPartition(); - UpdateClosure closure = new UpdateClosure(this, val, ver, expireTime, predicate, row); + UpdateClosure c = new UpdateClosure(this, val, ver, expireTime, p, row); - cctx.offheap().invoke(cctx, key, localPartition(), closure); + cctx.offheap().invoke(cctx, key, localPartition(), c); - return closure.treeOp != IgniteTree.OperationType.NOOP; + return c.treeOp != IgniteTree.OperationType.NOOP; } /** @@ -4480,6 +4514,9 @@ protected void removeValue() throws IgniteCheckedException { CacheDataRow row = cctx.offheap().read(this); + if (cctx.offheap().isTombstone(row)) + return; + if (row != null && (filter == null || filter.apply(row))) clo.apply(row); } @@ -5700,6 +5737,101 @@ private LazyValueEntry(KeyCacheObject key, boolean keepBinary) { } } + /** + * @param row Data row. + * @return {@code True} if row expired. + * @throws IgniteCheckedException If failed. + */ + private boolean checkRowExpired(CacheDataRow row) throws IgniteCheckedException { + assert row != null; + + if (!(row.expireTime() > 0 && row.expireTime() <= U.currentTimeMillis())) + return false; + + CacheObject expiredVal = row.value(); + + if (cctx.deferredDelete() && !detached() && !isInternal()) { + update(null, CU.TTL_ETERNAL, CU.EXPIRE_TIME_ETERNAL, ver, true); + + if (!deletedUnlocked()) + deletedUnlocked(true); + } + else + markObsolete0(cctx.versions().next(), true, null); + + if (cctx.events().isRecordable(EVT_CACHE_OBJECT_EXPIRED)) { + cctx.events().addEvent(partition(), + key(), + cctx.localNodeId(), + null, + EVT_CACHE_OBJECT_EXPIRED, + null, + false, + expiredVal, + expiredVal != null, + null, + null, + null, + true); + } + + cctx.continuousQueries().onEntryExpired(this, key(), expiredVal); + + return true; + } + + /** + * + */ + private static class RemoveClosure implements IgniteCacheOffheapManager.OffheapInvokeClosure { + /** */ + private final GridCacheMapEntry entry; + + /** */ + private final GridCacheVersion ver; + + /** */ + private IgniteTree.OperationType op; + + /** */ + private CacheDataRow oldRow; + + public RemoveClosure(GridCacheMapEntry entry, GridCacheVersion ver) { + this.entry = entry; + this.ver = ver; + } + + /** {@inheritDoc} */ + @Override public @Nullable CacheDataRow oldRow() { + return oldRow; + } + + /** {@inheritDoc} */ + @Override public void call(@Nullable CacheDataRow row) throws IgniteCheckedException { + if (row == null || !ver.equals(row.version())) { + op = IgniteTree.OperationType.NOOP; + + return; + } + + row.key(entry.key); + + oldRow = row; + + op = IgniteTree.OperationType.REMOVE; + } + + /** {@inheritDoc} */ + @Override public CacheDataRow newRow() { + return null; + } + + /** {@inheritDoc} */ + @Override public IgniteTree.OperationType operationType() { + return op; + } + } + /** * */ @@ -5717,7 +5849,7 @@ private static class UpdateClosure implements IgniteCacheOffheapManager.OffheapI private final long expireTime; /** */ - @Nullable private final IgnitePredicate predicate; + @Nullable private final IgniteBiPredicate p; /** */ private CacheDataRow newRow; @@ -5733,32 +5865,48 @@ private static class UpdateClosure implements IgniteCacheOffheapManager.OffheapI * @param val New value. * @param ver New version. * @param expireTime New expire time. - * @param predicate Optional predicate. + * @param p Optional predicate. + * @param newRow New row value. */ - UpdateClosure(GridCacheMapEntry entry, @Nullable CacheObject val, GridCacheVersion ver, long expireTime, - @Nullable IgnitePredicate predicate, @Nullable CacheDataRow newRow) { + private UpdateClosure( + GridCacheMapEntry entry, + @Nullable CacheObject val, + GridCacheVersion ver, + long expireTime, + @Nullable IgniteBiPredicate p, + @Nullable CacheDataRow newRow + ) { this.entry = entry; this.val = val; this.ver = ver; this.expireTime = expireTime; - this.predicate = predicate; + this.p = p; this.newRow = newRow; } /** {@inheritDoc} */ @Override public void call(@Nullable CacheDataRow oldRow) throws IgniteCheckedException { - if (oldRow != null) { + if (oldRow != null) oldRow.key(entry.key); - oldRow = checkRowExpired(oldRow); - } - this.oldRow = oldRow; - if (predicate != null && !predicate.apply(oldRow)) { - treeOp = IgniteTree.OperationType.NOOP; + if (p != null) { + CacheObject val = null; + GridCacheVersion ver = entry.ver; - return; + if (oldRow != null) { + if (!entry.checkRowExpired(oldRow) && !entry.context().offheap().isTombstone(oldRow)) + val = oldRow.value(); + + ver = oldRow.version(); + } + + if (!p.apply(val, ver)) { + treeOp = IgniteTree.OperationType.NOOP; + + return; + } } if (val != null) { @@ -5769,7 +5917,8 @@ private static class UpdateClosure implements IgniteCacheOffheapManager.OffheapI val, ver, expireTime, - oldRow); + oldRow + ); } treeOp = oldRow != null && oldRow.link() == newRow.link() ? @@ -5793,53 +5942,6 @@ private static class UpdateClosure implements IgniteCacheOffheapManager.OffheapI @Nullable @Override public CacheDataRow oldRow() { return oldRow; } - - /** - * Checks row for expiration and fire expire events if needed. - * - * @param row old row. - * @return {@code Null} if row was expired, row itself otherwise. - * @throws IgniteCheckedException - */ - private CacheDataRow checkRowExpired(CacheDataRow row) throws IgniteCheckedException { - assert row != null; - - if (!(row.expireTime() > 0 && row.expireTime() <= U.currentTimeMillis())) - return row; - - GridCacheContext cctx = entry.context(); - - CacheObject expiredVal = row.value(); - - if (cctx.deferredDelete() && !entry.detached() && !entry.isInternal()) { - entry.update(null, CU.TTL_ETERNAL, CU.EXPIRE_TIME_ETERNAL, entry.ver, true); - - if (!entry.deletedUnlocked() && !entry.isStartVersion()) - entry.deletedUnlocked(true); - } - else - entry.markObsolete0(cctx.versions().next(), true, null); - - if (cctx.events().isRecordable(EVT_CACHE_OBJECT_EXPIRED)) { - cctx.events().addEvent(entry.partition(), - entry.key(), - cctx.localNodeId(), - null, - EVT_CACHE_OBJECT_EXPIRED, - null, - false, - expiredVal, - expiredVal != null, - null, - null, - null, - true); - } - - cctx.continuousQueries().onEntryExpired(entry, entry.key(), expiredVal); - - return null; - } } /** @@ -6014,7 +6116,7 @@ private static class AtomicCacheUpdateClosure implements IgniteCacheOffheapManag // unswap entry.update(oldRow.value(), oldRow.expireTime(), 0, oldRow.version(), false); - if (checkRowExpired(oldRow)) { + if (entry.checkRowExpired(oldRow)) { oldRowExpiredFlag = true; oldRow = null; @@ -6165,53 +6267,6 @@ else if ((invokeRes == null || invokeRes.getValue() == null) && writeObj != null assert updateRes != null && treeOp != null; } - /** - * Check row expiration and fire expire events if needed. - * - * @param row Old row. - * @return {@code True} if row was expired, {@code False} otherwise. - * @throws IgniteCheckedException if failed. - */ - private boolean checkRowExpired(CacheDataRow row) throws IgniteCheckedException { - assert row != null; - - if (!(row.expireTime() > 0 && row.expireTime() <= U.currentTimeMillis())) - return false; - - GridCacheContext cctx = entry.context(); - - CacheObject expiredVal = row.value(); - - if (cctx.deferredDelete() && !entry.detached() && !entry.isInternal()) { - entry.update(null, CU.TTL_ETERNAL, CU.EXPIRE_TIME_ETERNAL, entry.ver, true); - - if (!entry.deletedUnlocked()) - entry.deletedUnlocked(true); - } - else - entry.markObsolete0(cctx.versions().next(), true, null); - - if (cctx.events().isRecordable(EVT_CACHE_OBJECT_EXPIRED)) { - cctx.events().addEvent(entry.partition(), - entry.key(), - cctx.localNodeId(), - null, - EVT_CACHE_OBJECT_EXPIRED, - null, - false, - expiredVal, - expiredVal != null, - null, - null, - null, - true); - } - - cctx.continuousQueries().onEntryExpired(entry, entry.key(), expiredVal); - - return true; - } - /** * @param storeLoadedVal Value loaded from store. * @param updateExpireTime {@code True} if need update expire time. diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/IgniteCacheOffheapManager.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/IgniteCacheOffheapManager.java index e73ad52400451..c5da2b4271d87 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/IgniteCacheOffheapManager.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/IgniteCacheOffheapManager.java @@ -30,6 +30,7 @@ import org.apache.ignite.internal.processors.cache.mvcc.MvccSnapshot; import org.apache.ignite.internal.processors.cache.mvcc.MvccVersion; import org.apache.ignite.internal.processors.cache.persistence.CacheDataRow; +import org.apache.ignite.internal.processors.cache.persistence.CacheDataRowAdapter; import org.apache.ignite.internal.processors.cache.persistence.CacheSearchRow; import org.apache.ignite.internal.processors.cache.persistence.DataRowCacheAware; import org.apache.ignite.internal.processors.cache.persistence.RootPage; @@ -161,11 +162,6 @@ public interface IgniteCacheOffheapManager { */ public void destroyCacheDataStore(CacheDataStore store) throws IgniteCheckedException; - /** - * TODO: GG-10884, used on only from initialValue. - */ - public boolean containsKey(GridCacheMapEntry entry); - /** * @param cctx Cache context. * @param c Closure. @@ -224,7 +220,7 @@ public List> mvccAllVersions(GridCacheContext * @return Iterator over all versions. * @throws IgniteCheckedException If failed. */ - GridCursor mvccAllVersionsCursor(GridCacheContext cctx, KeyCacheObject key, Object x) + GridCursor mvccAllVersionsCursor(GridCacheContext cctx, KeyCacheObject key, CacheDataRowAdapter.RowData x) throws IgniteCheckedException; /** @@ -404,6 +400,27 @@ public void remove( GridDhtLocalPartition part ) throws IgniteCheckedException; + /** + * @param cctx Cache context. + * @param key Key. + * @param ver Version. + * @param part Partition. + * @throws IgniteCheckedException If failed. + */ + public void removeWithTombstone( + GridCacheContext cctx, + KeyCacheObject key, + GridCacheVersion ver, + GridDhtLocalPartition part + ) throws IgniteCheckedException; + + /** + * @param row Data row. + * @return {@code True} if give row is tombstone. + * @throws IgniteCheckedException If failed. + */ + public boolean isTombstone(@Nullable CacheDataRow row) throws IgniteCheckedException; + /** * @param ldr Class loader. * @return Number of undeployed entries. @@ -441,10 +458,20 @@ public GridIterator cachePartitionIterator(int cacheId, final int /** * @param part Partition number. + * @param withTombstones {@code True} if should return tombstone entries. * @return Iterator for given partition. * @throws IgniteCheckedException If failed. */ - public GridIterator partitionIterator(final int part) throws IgniteCheckedException; + public GridIterator partitionIterator(final int part, boolean withTombstones) throws IgniteCheckedException; + + /** + * @param part Partition number. + * @return Iterator for given partition that skips tombstones. + * @throws IgniteCheckedException If failed. + */ + public default GridIterator partitionIterator(final int part) throws IgniteCheckedException { + return partitionIterator(part, false); + } /** * @param part Partition number. @@ -732,7 +759,7 @@ public int cleanup(GridCacheContext cctx, @Nullable List * * @param cctx Cache context. * @param row Row. - * @throws IgniteCheckedException + * @throws IgniteCheckedException If failed. */ public void updateTxState(GridCacheContext cctx, CacheSearchRow row) throws IgniteCheckedException; @@ -905,7 +932,7 @@ MvccUpdateResult mvccLock( * @param ver Version. * @param expireTime Expire time. * @param mvccVer Mvcc version. - * @throws IgniteCheckedException + * @throws IgniteCheckedException If failed. */ void mvccApplyUpdate(GridCacheContext cctx, KeyCacheObject key, @@ -923,6 +950,20 @@ void mvccApplyUpdate(GridCacheContext cctx, */ public void remove(GridCacheContext cctx, KeyCacheObject key, int partId) throws IgniteCheckedException; + /** + * @param cctx Cache context. + * @param key Key. + * @param ver Version. + * @param part Partition. + * @throws IgniteCheckedException If failed. + */ + public void removeWithTombstone( + GridCacheContext cctx, + KeyCacheObject key, + GridCacheVersion ver, + GridDhtLocalPartition part + ) throws IgniteCheckedException; + /** * @param cctx Cache context. * @param key Key. @@ -940,7 +981,7 @@ void mvccApplyUpdate(GridCacheContext cctx, * @return Iterator over all versions. * @throws IgniteCheckedException If failed. */ - GridCursor mvccAllVersionsCursor(GridCacheContext cctx, KeyCacheObject key, Object x) + GridCursor mvccAllVersionsCursor(GridCacheContext cctx, KeyCacheObject key, CacheDataRowAdapter.RowData x) throws IgniteCheckedException; /** @@ -964,17 +1005,18 @@ List> mvccFindAllVersions(GridCacheContext cc throws IgniteCheckedException; /** + * @param withTombstones {@code True} if should return tombstone entries. * @return Data cursor. * @throws IgniteCheckedException If failed. */ - public GridCursor cursor() throws IgniteCheckedException; + public GridCursor cursor(boolean withTombstones) throws IgniteCheckedException; /** * @param x Implementation specific argument, {@code null} always means that we need to return full detached data row. * @return Data cursor. * @throws IgniteCheckedException If failed. */ - public GridCursor cursor(Object x) throws IgniteCheckedException; + public GridCursor cursor(CacheDataRowAdapter.RowData x) throws IgniteCheckedException; /** * @param mvccSnapshot MVCC snapshot. @@ -985,10 +1027,11 @@ List> mvccFindAllVersions(GridCacheContext cc /** * @param cacheId Cache ID. + * @param withTombstones {@code True} if should return tombstone entries. * @return Data cursor. * @throws IgniteCheckedException If failed. */ - public GridCursor cursor(int cacheId) throws IgniteCheckedException; + public GridCursor cursor(int cacheId, boolean withTombstones) throws IgniteCheckedException; /** * @param cacheId Cache ID. @@ -1018,7 +1061,7 @@ public GridCursor cursor(int cacheId, KeyCacheObject low * @throws IgniteCheckedException If failed. */ public GridCursor cursor(int cacheId, KeyCacheObject lower, - KeyCacheObject upper, Object x) throws IgniteCheckedException; + KeyCacheObject upper, CacheDataRowAdapter.RowData x) throws IgniteCheckedException; /** * @param cacheId Cache ID. @@ -1026,11 +1069,16 @@ public GridCursor cursor(int cacheId, KeyCacheObject low * @param upper Upper bound. * @param x Implementation specific argument, {@code null} always means that we need to return full detached data row. * @param snapshot Mvcc snapshot. + * @param withTombstones {@code True} if should return tombstone entries. * @return Data cursor. * @throws IgniteCheckedException If failed. */ - public GridCursor cursor(int cacheId, KeyCacheObject lower, - KeyCacheObject upper, Object x, MvccSnapshot snapshot) throws IgniteCheckedException; + public GridCursor cursor(int cacheId, + KeyCacheObject lower, + KeyCacheObject upper, + CacheDataRowAdapter.RowData x, + MvccSnapshot snapshot, + boolean withTombstones) throws IgniteCheckedException; /** * Destroys the tree associated with the store. @@ -1094,5 +1142,10 @@ public GridCursor cursor(int cacheId, KeyCacheObject low * Partition storage. */ public PartitionMetaStorage partStorage(); + + /** + * @return Number of tombstone entries. + */ + public long tombstonesCount(); } } diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/IgniteCacheOffheapManagerImpl.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/IgniteCacheOffheapManagerImpl.java index 0df7728f5c5db..85b3654b4f08b 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/IgniteCacheOffheapManagerImpl.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/IgniteCacheOffheapManagerImpl.java @@ -142,6 +142,7 @@ import static org.apache.ignite.internal.processors.cache.mvcc.MvccUtils.unexpectedStateException; import static org.apache.ignite.internal.processors.cache.persistence.GridCacheOffheapManager.EMPTY_CURSOR; import static org.apache.ignite.internal.processors.cache.persistence.tree.io.DataPageIO.MVCC_INFO_SIZE; +import static org.apache.ignite.internal.util.IgniteTree.OperationType.IN_PLACE; import static org.apache.ignite.internal.util.IgniteTree.OperationType.NOOP; import static org.apache.ignite.internal.util.IgniteTree.OperationType.PUT; @@ -436,8 +437,8 @@ private Iterator cacheData(boolean primary, boolean backup, Affi GridCacheContext cctx, KeyCacheObject key, GridDhtLocalPartition part, - OffheapInvokeClosure c) - throws IgniteCheckedException { + OffheapInvokeClosure c + ) throws IgniteCheckedException { dataStore(part).invoke(cctx, key, c); } @@ -615,6 +616,28 @@ private Iterator cacheData(boolean primary, boolean backup, Affi dataStore(part).remove(cctx, key, partId); } + /** {@inheritDoc} */ + @Override public void removeWithTombstone( + GridCacheContext cctx, + KeyCacheObject key, + GridCacheVersion ver, + GridDhtLocalPartition part + ) throws IgniteCheckedException { + assert part != null; + assert !cctx.isNear(); + assert !cctx.isLocal(); + + dataStore(part).removeWithTombstone(cctx, key, ver, part); + } + + /** {@inheritDoc} */ + @Override public boolean isTombstone(CacheDataRow row) throws IgniteCheckedException { + if (!grp.supportsTombstone()) + return false; + + return grp.shared().database().isTombstone(row); + } + /** {@inheritDoc} */ @Override @Nullable public CacheDataRow read(GridCacheMapEntry entry) throws IgniteCheckedException { @@ -662,7 +685,7 @@ private Iterator cacheData(boolean primary, boolean backup, Affi /** {@inheritDoc} */ @Override public GridCursor mvccAllVersionsCursor(GridCacheContext cctx, - KeyCacheObject key, Object x) throws IgniteCheckedException { + KeyCacheObject key, CacheDataRowAdapter.RowData x) throws IgniteCheckedException { CacheDataStore dataStore = dataStore(cctx, key); return dataStore != null ? dataStore.mvccAllVersionsCursor(cctx, key, x) : EMPTY_CURSOR; @@ -682,18 +705,6 @@ private Iterator cacheData(boolean primary, boolean backup, Affi return part != null ? dataStore(part) : null; } - /** {@inheritDoc} */ - @Override public boolean containsKey(GridCacheMapEntry entry) { - try { - return read(entry) != null; - } - catch (IgniteCheckedException e) { - U.error(log, "Failed to read value", e); - - return false; - } - } - /** {@inheritDoc} */ @Override public void onPartitionCounterUpdated(int part, long cntr) { // No-op. @@ -718,8 +729,8 @@ private Iterator cacheData(boolean primary, boolean backup, Affi GridCacheVersion obsoleteVer = null; try (GridCloseableIterator it = grp.isLocal() ? - iterator(cctx.cacheId(), cacheDataStores().iterator(), null, null) : - evictionSafeIterator(cctx.cacheId(), cacheDataStores().iterator())) { + iterator(cctx.cacheId(), cacheDataStores().iterator(), null, null, true) : + evictionSafeIterator(cctx.cacheId(), cacheDataStores().iterator(), true)) { while (it.hasNext()) { cctx.shared().database().checkpointReadLock(); @@ -862,7 +873,7 @@ private Iterator cacheData(boolean primary, boolean backup, Affi @Nullable MvccSnapshot mvccSnapshot, Boolean dataPageScanEnabled ) { - return iterator(cacheId, cacheData(primary, backups, topVer), mvccSnapshot, dataPageScanEnabled); + return iterator(cacheId, cacheData(primary, backups, topVer), mvccSnapshot, dataPageScanEnabled, false); } /** {@inheritDoc} */ @@ -873,17 +884,17 @@ private Iterator cacheData(boolean primary, boolean backup, Affi if (data == null) return new GridEmptyCloseableIterator<>(); - return iterator(cacheId, singletonIterator(data), mvccSnapshot, dataPageScanEnabled); + return iterator(cacheId, singletonIterator(data), mvccSnapshot, dataPageScanEnabled, false); } /** {@inheritDoc} */ - @Override public GridIterator partitionIterator(int part) { + @Override public GridIterator partitionIterator(int part, boolean withTombstones) { CacheDataStore data = partitionData(part); if (data == null) return new GridEmptyCloseableIterator<>(); - return iterator(CU.UNDEFINED_CACHE_ID, singletonIterator(data), null, null); + return iterator(CU.UNDEFINED_CACHE_ID, singletonIterator(data), null, null, withTombstones); } /** @@ -892,12 +903,14 @@ private Iterator cacheData(boolean primary, boolean backup, Affi * @param dataIt Data store iterator. * @param mvccSnapshot Mvcc snapshot. * @param dataPageScanEnabled Flag to enable data page scan. + * @param withTombstones {@code True} if should return tombstone entries. * @return Rows iterator */ private GridCloseableIterator iterator(final int cacheId, final Iterator dataIt, final MvccSnapshot mvccSnapshot, - Boolean dataPageScanEnabled + Boolean dataPageScanEnabled, + boolean withTombstones ) { return new GridCloseableIteratorAdapter() { /** */ @@ -934,7 +947,7 @@ private GridCloseableIterator iterator(final int cacheId, try { if (mvccSnapshot == null) - cur = cacheId == CU.UNDEFINED_CACHE_ID ? ds.cursor() : ds.cursor(cacheId); + cur = cacheId == CU.UNDEFINED_CACHE_ID ? ds.cursor(withTombstones) : ds.cursor(cacheId, withTombstones); else { cur = cacheId == CU.UNDEFINED_CACHE_ID ? ds.cursor(mvccSnapshot) : ds.cursor(cacheId, mvccSnapshot); @@ -966,9 +979,13 @@ private GridCloseableIterator iterator(final int cacheId, /** * @param cacheId Cache ID. * @param dataIt Data store iterator. + * @param withTombstones {@code True} if should return tombstone entries. * @return Rows iterator */ - private GridCloseableIterator evictionSafeIterator(final int cacheId, final Iterator dataIt) { + private GridCloseableIterator evictionSafeIterator( + final int cacheId, + final Iterator dataIt, + boolean withTombstones) { return new GridCloseableIteratorAdapter() { /** */ private GridCursor cur; @@ -999,7 +1016,7 @@ private GridCloseableIterator evictionSafeIterator(final int cache if (!reservePartition(ds.partId())) continue; - cur = cacheId == CU.UNDEFINED_CACHE_ID ? ds.cursor() : ds.cursor(cacheId); + cur = cacheId == CU.UNDEFINED_CACHE_ID ? ds.cursor(withTombstones) : ds.cursor(cacheId, withTombstones); } else break; @@ -1458,6 +1475,9 @@ protected class CacheDataStoreImpl implements CacheDataStore { /** */ private final PageHandler mvccApplyChanges = new MvccApplyChangesHandler(); + /** Tombstones counter. */ + private final AtomicLong tombstonesCnt = new AtomicLong(); + /** * @param partId Partition number. * @param rowStore Row store. @@ -1709,13 +1729,23 @@ private void invoke0(GridCacheContext cctx, CacheSearchRow row, OffheapInvokeClo case REMOVE: { CacheDataRow oldRow = c.oldRow(); - finishRemove(cctx, row.key(), oldRow); + finishRemove(cctx, row.key(), oldRow, null); break; } - case NOOP: case IN_PLACE: + assert !isTombstone(c.newRow()); + + if (isTombstone(c.oldRow())) { + tombstoneRemoved(); + + incrementSize(cctx.cacheId()); + } + + break; + + case NOOP: break; default: @@ -1733,6 +1763,10 @@ private void invoke0(GridCacheContext cctx, CacheSearchRow row, OffheapInvokeClo @Nullable CacheDataRow oldRow) throws IgniteCheckedException { int cacheId = grp.storeCacheIdInDataPage() ? cctx.cacheId() : CU.UNDEFINED_CACHE_ID; + // Set real stored cacheId to properly calculate row size. + if (oldRow != null) + oldRow.cacheId(cacheId); + DataRow dataRow = makeDataRow(key, val, ver, expireTime, cacheId); if (canUpdateOldRow(cctx, oldRow, dataRow) && rowStore.updateRow(oldRow.link(), dataRow, grp.statisticsHolderData())) @@ -1748,8 +1782,13 @@ private void invoke0(GridCacheContext cctx, CacheSearchRow row, OffheapInvokeClo assert dataRow.link() != 0 : dataRow; - if (grp.sharedGroup() && dataRow.cacheId() == CU.UNDEFINED_CACHE_ID) - dataRow.cacheId(cctx.cacheId()); + if (grp.sharedGroup()) { + if (dataRow.cacheId() == CU.UNDEFINED_CACHE_ID) + dataRow.cacheId(cctx.cacheId()); + + if (oldRow != null && oldRow.cacheId() == CU.UNDEFINED_CACHE_ID) + oldRow.cacheId(cctx.cacheId()); + } return dataRow; } @@ -2607,7 +2646,12 @@ private int cleanup0(GridCacheContext cctx, @Nullable List mvccAllVersionsCursor(GridCacheContext cctx, KeyCacheObject key, Object x) + @Override public GridCursor mvccAllVersionsCursor(GridCacheContext cctx, KeyCacheObject key, CacheDataRowAdapter.RowData x) throws IgniteCheckedException { int cacheId = cctx.cacheId(); @@ -2835,19 +2996,91 @@ private void afterRowFound(@Nullable CacheDataRow row, KeyCacheObject key) throw } /** {@inheritDoc} */ - @Override public GridCursor cursor() throws IgniteCheckedException { - return dataTree.find(null, null); + @Override public GridCursor cursor(boolean withTombstones) throws IgniteCheckedException { + GridCursor cur = dataTree.find(null, null); + + return withTombstones ? cur : cursorSkipTombstone(cur); + } + + /** + * @param cur Cursor. + * @return Cursor skipping non-tombstone entries. + */ + private GridCursor cursorSkipEmpty(final GridCursor cur) { + if (!grp.supportsTombstone()) + return cur; + + return new GridCursor() { + /** */ + CacheDataRow next; + + /** {@inheritDoc} */ + @Override public boolean next() throws IgniteCheckedException { + while (cur.next()) { + CacheDataRow next = cur.get(); + + // If request cursor with RowData.TOMBSTONES, then for non-tombtones all fields are null. + if (next.version() != null) { + this.next = next; + + return true; + } + } + + return false; + } + + /** {@inheritDoc} */ + @Override public CacheDataRow get() { + return next; + } + }; + } + + /** + * @param cur Cursor. + * @return Cursor skipping tombstone entries. + */ + private GridCursor cursorSkipTombstone(final GridCursor cur) { + if (!grp.supportsTombstone()) + return cur; + + return new GridCursor() { + /** */ + CacheDataRow next; + + /** {@inheritDoc} */ + @Override public boolean next() throws IgniteCheckedException { + while (cur.next()) { + CacheDataRow next = cur.get(); + + if (!isTombstone(next)) { + this.next = next; + + return true; + } + } + + return false; + } + + /** {@inheritDoc} */ + @Override public CacheDataRow get() { + return next; + } + }; } /** {@inheritDoc} */ - @Override public GridCursor cursor(Object x) throws IgniteCheckedException { - return dataTree.find(null, null, x); + @Override public GridCursor cursor(CacheDataRowAdapter.RowData x) throws IgniteCheckedException { + GridCursor cur = dataTree.find(null, null, x); + + return x == CacheDataRowAdapter.RowData.TOMBSTONES ? cursorSkipEmpty(cur) : cursorSkipTombstone(cur); } /** {@inheritDoc} */ @Override public GridCursor cursor(MvccSnapshot mvccSnapshot) throws IgniteCheckedException { - GridCursor cursor; if (mvccSnapshot != null) { assert grp.mvccEnabled(); @@ -2856,20 +3089,20 @@ private void afterRowFound(@Nullable CacheDataRow row, KeyCacheObject key) throw new MvccFirstVisibleRowTreeClosure(grp.singleCacheContext(), mvccSnapshot), null); } else - cursor = dataTree.find(null, null); + cursor = cursorSkipTombstone(dataTree.find(null, null)); return cursor; } /** {@inheritDoc} */ - @Override public GridCursor cursor(int cacheId) throws IgniteCheckedException { - return cursor(cacheId, null, null); + @Override public GridCursor cursor(int cacheId, boolean withTombstones) throws IgniteCheckedException { + return cursor(cacheId, null, null, null, null, withTombstones); } /** {@inheritDoc} */ @Override public GridCursor cursor(int cacheId, MvccSnapshot mvccSnapshot) throws IgniteCheckedException { - return cursor(cacheId, null, null, null, mvccSnapshot); + return cursor(cacheId, null, null, null, mvccSnapshot, false); } /** {@inheritDoc} */ @@ -2880,13 +3113,17 @@ private void afterRowFound(@Nullable CacheDataRow row, KeyCacheObject key) throw /** {@inheritDoc} */ @Override public GridCursor cursor(int cacheId, KeyCacheObject lower, - KeyCacheObject upper, Object x) throws IgniteCheckedException { - return cursor(cacheId, lower, upper, null, null); + KeyCacheObject upper, CacheDataRowAdapter.RowData x) throws IgniteCheckedException { + return cursor(cacheId, lower, upper, null, null, false); } /** {@inheritDoc} */ - @Override public GridCursor cursor(int cacheId, KeyCacheObject lower, - KeyCacheObject upper, Object x, MvccSnapshot snapshot) throws IgniteCheckedException { + @Override public GridCursor cursor(int cacheId, + KeyCacheObject lower, + KeyCacheObject upper, + CacheDataRowAdapter.RowData x, + MvccSnapshot snapshot, + boolean withTombstones) throws IgniteCheckedException { SearchRow lowerRow; SearchRow upperRow; @@ -2910,9 +3147,13 @@ private void afterRowFound(@Nullable CacheDataRow row, KeyCacheObject key) throw cursor = dataTree.find(lowerRow, upperRow, new MvccFirstVisibleRowTreeClosure(cctx, snapshot), x); } - else + else { cursor = dataTree.find(lowerRow, upperRow, x); + if (!withTombstones) + cursor = cursorSkipTombstone(cursor); + } + return cursor; } @@ -2952,7 +3193,7 @@ private void afterRowFound(@Nullable CacheDataRow row, KeyCacheObject key) throw Exception ex = null; GridCursor cur = - cursor(cacheId, null, null, CacheDataRowAdapter.RowData.KEY_ONLY); + cursor(cacheId, null, null, CacheDataRowAdapter.RowData.KEY_ONLY, null, true); while (cur.next()) { CacheDataRow row = cur.get(); @@ -2996,17 +3237,24 @@ private void afterRowFound(@Nullable CacheDataRow row, KeyCacheObject key) throw * @param size Size to init. * @param updCntr Update counter. * @param cacheSizes Cache sizes if store belongs to group containing multiple caches. - * @param cntrUpdData Counter updates. + * @param updCntrGapsData Update counters gaps raw data. + * @param tombstonesCnt Tombstones count. */ - public void restoreState(long size, long updCntr, @Nullable Map cacheSizes, byte[] cntrUpdData) { - pCntr.init(updCntr, cntrUpdData); + public void restoreState( + long size, + long updCntr, + Map cacheSizes, + byte[] updCntrGapsData, + long tombstonesCnt + ) { + pCntr.init(updCntr, updCntrGapsData); storageSize.set(size); - if (cacheSizes != null) { - for (Map.Entry e : cacheSizes.entrySet()) - this.cacheSizes.put(e.getKey(), new AtomicLong(e.getValue())); - } + for (Map.Entry e : cacheSizes.entrySet()) + this.cacheSizes.put(e.getKey(), new AtomicLong(e.getValue())); + + this.tombstonesCnt.set(tombstonesCnt); } /** {@inheritDoc} */ @@ -3029,6 +3277,25 @@ public void restoreState(long size, long updCntr, @Nullable Map c return null; } + /** {@inheritDoc} */ + @Override public long tombstonesCount() { + return tombstonesCnt.get(); + } + + /** + * Called when tombstone has removed from partition. + */ + private void tombstoneRemoved() { + tombstonesCnt.decrementAndGet(); + } + + /** + * Called when tombstone has created in partition. + */ + private void tombstoneCreated() { + tombstonesCnt.incrementAndGet(); + } + /** * @param cctx Cache context. * @param key Key. diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/IncompleteCacheObject.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/IncompleteCacheObject.java index dedb3bd98ef24..3802c2b6c72d1 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/IncompleteCacheObject.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/IncompleteCacheObject.java @@ -43,6 +43,8 @@ public IncompleteCacheObject(final ByteBuffer buf) { if (buf.remaining() >= HEAD_LEN) { data = new byte[buf.getInt()]; type = buf.get(); + + headerReady(); } // We cannot fully read head to initialize data buffer. // Start partial read of header. @@ -68,6 +70,8 @@ public IncompleteCacheObject(final ByteBuffer buf) { data = new byte[headBuf.getInt()]; type = headBuf.get(); + + headerReady(); } } @@ -75,6 +79,21 @@ public IncompleteCacheObject(final ByteBuffer buf) { super.readData(buf); } + /** + * Invoke when object header is ready. + */ + private void headerReady() { + if (type == CacheObject.TOMBSTONE) + object(TombstoneCacheObject.INSTANCE); + } + + /** + * @return Size of already read data. + */ + public int dataOffset() { + return off; + } + /** * @return Data type. */ diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/IncompleteObject.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/IncompleteObject.java index 7c24c12bcf5de..27c9def992545 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/IncompleteObject.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/IncompleteObject.java @@ -33,7 +33,7 @@ public class IncompleteObject { private T obj; /** */ - private int off; + protected int off; /** * @param data Data bytes. diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/PartitionUpdateCounter.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/PartitionUpdateCounter.java index 112a11051fd86..1d41d7f05e297 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/PartitionUpdateCounter.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/PartitionUpdateCounter.java @@ -35,9 +35,9 @@ public interface PartitionUpdateCounter extends Iterable { * Restores update counter state. * * @param initUpdCntr LWM. - * @param cntrUpdData Counter updates raw data. + * @param updCntrGapsData Updates counters gaps raw data. */ - public void init(long initUpdCntr, @Nullable byte[] cntrUpdData); + public void init(long initUpdCntr, @Nullable byte[] updCntrGapsData); /** * @deprecated TODO LWM should be used as initial counter https://ggsystems.atlassian.net/browse/GG-17396 diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/TombstoneCacheObject.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/TombstoneCacheObject.java new file mode 100644 index 0000000000000..d87b0245a6412 --- /dev/null +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/TombstoneCacheObject.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.internal.processors.cache; + +import java.io.IOException; +import java.io.ObjectInput; +import org.apache.ignite.IgniteCheckedException; +import org.jetbrains.annotations.Nullable; + +/** + * Special value object indicating that value is removed. + */ +public class TombstoneCacheObject extends CacheObjectAdapter { + /** */ + private static final long serialVersionUID = 2106775575127797257L; + + /** Empty. */ + private static final byte[] EMPTY = new byte[] { }; + + /** Instance. */ + public static final TombstoneCacheObject INSTANCE = new TombstoneCacheObject(); + + /** + * Default constructor. + */ + public TombstoneCacheObject() { + valBytes = EMPTY; + } + + /** {@inheritDoc} */ + @Override public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { + valBytes = EMPTY; + } + + /** {@inheritDoc} */ + @Override public @Nullable T value(CacheObjectValueContext ctx, boolean cpy) { + return null; + } + + /** {@inheritDoc} */ + @Override public byte[] valueBytes(CacheObjectValueContext ctx) throws IgniteCheckedException { + return valBytes; + } + + /** {@inheritDoc} */ + @Override public byte cacheObjectType() { + return CacheObject.TOMBSTONE; + } + + /** {@inheritDoc} */ + @Override public boolean isPlatformType() { + return true; + } + + /** {@inheritDoc} */ + @Override public CacheObject prepareForCache(CacheObjectContext ctx) { + return this; + } + + /** {@inheritDoc} */ + @Override public void finishUnmarshal(CacheObjectValueContext ctx, ClassLoader ldr) throws IgniteCheckedException { + + } + + /** {@inheritDoc} */ + @Override public void prepareMarshal(CacheObjectValueContext ctx) throws IgniteCheckedException { + + } + + /** {@inheritDoc} */ + @Override public short directType() { + return 176; + } + + /** {@inheritDoc} */ + @Override public void onAckReceived() { + + } +} diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/binary/CacheObjectBinaryProcessorImpl.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/binary/CacheObjectBinaryProcessorImpl.java index ac2d237562535..8c8a3efe5ee2e 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/binary/CacheObjectBinaryProcessorImpl.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/binary/CacheObjectBinaryProcessorImpl.java @@ -17,7 +17,6 @@ package org.apache.ignite.internal.processors.cache.binary; -import javax.cache.CacheException; import java.io.File; import java.io.Serializable; import java.math.BigDecimal; @@ -32,6 +31,7 @@ import java.util.UUID; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; +import javax.cache.CacheException; import org.apache.ignite.IgniteBinary; import org.apache.ignite.IgniteCheckedException; import org.apache.ignite.IgniteClientDisconnectedException; @@ -83,6 +83,7 @@ import org.apache.ignite.internal.processors.cache.IncompleteCacheObject; import org.apache.ignite.internal.processors.cache.KeyCacheObject; import org.apache.ignite.internal.processors.cache.KeyCacheObjectImpl; +import org.apache.ignite.internal.processors.cache.TombstoneCacheObject; import org.apache.ignite.internal.processors.cache.transactions.IgniteInternalTx; import org.apache.ignite.internal.processors.cacheobject.IgniteCacheObjectProcessor; import org.apache.ignite.internal.processors.cacheobject.UserCacheObjectByteArrayImpl; @@ -1145,6 +1146,9 @@ private int partition(CacheObjectContext ctx, @Nullable GridCacheContext cctx, O case CacheObject.TYPE_REGULAR: return new CacheObjectImpl(null, bytes); + + case CacheObject.TOMBSTONE: + return TombstoneCacheObject.INSTANCE; } throw new IllegalArgumentException("Invalid object type: " + type); diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/GridDhtLocalPartition.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/GridDhtLocalPartition.java index 65447c855f554..cd61492d15d7b 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/GridDhtLocalPartition.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/GridDhtLocalPartition.java @@ -43,6 +43,7 @@ import org.apache.ignite.internal.processors.cache.GridCacheConcurrentMapImpl; import org.apache.ignite.internal.processors.cache.GridCacheContext; import org.apache.ignite.internal.processors.cache.GridCacheEntryEx; +import org.apache.ignite.internal.processors.cache.GridCacheEntryRemovedException; import org.apache.ignite.internal.processors.cache.GridCacheMapEntry; import org.apache.ignite.internal.processors.cache.GridCacheMapEntryFactory; import org.apache.ignite.internal.processors.cache.GridCacheSharedContext; @@ -53,6 +54,7 @@ import org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPreloader; import org.apache.ignite.internal.processors.cache.extras.GridCacheObsoleteEntryExtras; import org.apache.ignite.internal.processors.cache.persistence.CacheDataRow; +import org.apache.ignite.internal.processors.cache.persistence.CacheDataRowAdapter; import org.apache.ignite.internal.processors.cache.transactions.IgniteInternalTx; import org.apache.ignite.internal.processors.cache.transactions.TxCounters; import org.apache.ignite.internal.processors.cache.version.GridCacheVersion; @@ -61,7 +63,9 @@ import org.apache.ignite.internal.util.collection.IntMap; import org.apache.ignite.internal.util.collection.IntRWHashMap; import org.apache.ignite.internal.util.future.GridFutureAdapter; +import org.apache.ignite.internal.util.lang.GridCursor; import org.apache.ignite.internal.util.lang.GridIterator; +import org.apache.ignite.internal.util.lang.GridIteratorAdapter; import org.apache.ignite.internal.util.tostring.GridToStringExclude; import org.apache.ignite.internal.util.typedef.internal.LT; import org.apache.ignite.internal.util.typedef.internal.S; @@ -174,10 +178,10 @@ public class GridDhtLocalPartition extends GridCacheConcurrentMapImpl implements * @param recovery Flag indicates that partition is created during recovery phase. */ public GridDhtLocalPartition( - GridCacheSharedContext ctx, - CacheGroupContext grp, - int id, - boolean recovery + GridCacheSharedContext ctx, + CacheGroupContext grp, + int id, + boolean recovery ) { super(ENTRY_FACTORY); @@ -602,8 +606,12 @@ public boolean own() { assert partState == MOVING || partState == LOST; - if (casState(state, OWNING)) + if (casState(state, OWNING)) { + if (hasTombstones()) + clearTombstonesAsync(); + return true; + } } } @@ -756,6 +764,21 @@ public void clearAsync() { clearAsync0(false); } + /** + * @return {@code True} if partition has tombstone entries. + */ + boolean hasTombstones() { + return grp.supportsTombstone() && dataStore().tombstonesCount() > 0; + } + + /** + * Adds async task that will clear tombstone entries from partition. + * @see #clearTombstones(EvictionContext). + */ + void clearTombstonesAsync() { + grp.shared().evict().clearTombstonesAsync(grp, this); + } + /** * Continues delayed clearing of partition if possible. * Clearing may be delayed because of existing reservations. @@ -921,7 +944,7 @@ public boolean isClearing() { * @return {@code false} if clearing is not started due to existing reservations. * @throws NodeStoppingException If node is stopping. */ - public boolean tryClear(EvictionContext evictionCtx) throws NodeStoppingException { + public boolean tryClear(EvictionContext evictionCtx) throws NodeStoppingException, IgniteCheckedException { if (clearFuture.isDone()) return true; @@ -932,8 +955,73 @@ public boolean tryClear(EvictionContext evictionCtx) throws NodeStoppingExceptio if (addEvicting()) { try { + GridCacheVersion clearVer = ctx.versions().next(); + + GridCacheObsoleteEntryExtras extras = new GridCacheObsoleteEntryExtras(clearVer); + + boolean rec = grp.eventRecordable(EVT_CACHE_REBALANCE_OBJECT_UNLOADED); + + if (grp.sharedGroup()) + cacheMaps.forEach((key, hld) -> clearOnheapEntries(hld.map, extras, rec)); + else + clearOnheapEntries(singleCacheEntryMap.map, extras, rec); + // Attempt to evict partition entries from cache. - long clearedEntities = clearAll(evictionCtx); + long clearedEntities = doClear( + evictionCtx, + 1000, + grp.offheap().partitionIterator(id, true), + (hld, row) -> { + // Do not clear fresh rows in case of partition reloading. + // This is required because normal updates are possible to moving partition which is currently cleared. + if (row.version().compareTo(clearVer) >= 0 && state() == MOVING) + return false; + + GridCacheMapEntry cached = putEntryIfObsoleteOrAbsent( + hld, + hld.cctx, + grp.affinity().lastVersion(), + row.key(), + true, + false); + + if (cached instanceof GridDhtCacheEntry && ((GridDhtCacheEntry)cached).clearInternal(clearVer, extras)) { + removeEntry(cached); + + if (rec && !hld.cctx.config().isEventsDisabled()) { + hld.cctx.events().addEvent(cached.partition(), + cached.key(), + ctx.localNodeId(), + null, + null, + null, + EVT_CACHE_REBALANCE_OBJECT_UNLOADED, + null, + false, + cached.rawGet(), + cached.hasValue(), + null, + null, + null, + false); + } + + return true; + } + + return false; + } + ); + + if (forceTestCheckpointOnEviction) { + if (partWhereTestCheckpointEnforced == null && clearedEntities >= fullSize()) { + ctx.database().forceCheckpoint("test").finishFuture().get(); + + log.warning("Forced checkpoint by test reasons for partition: " + this); + + partWhereTestCheckpointEnforced = id; + } + } if (log.isDebugEnabled()) log.debug("Partition has been cleared [grp=" + grp.cacheOrGroupName() @@ -1120,112 +1208,119 @@ public long fullSize() { } /** - * Removes all entries and rows from this partition. + * Iterates over partition entries and removes tombstone entries. * - * @return Number of rows cleared from page memory. - * @throws NodeStoppingException If node stopping. + * @param evictionCtx Eviction context. */ - private long clearAll(EvictionContext evictionCtx) throws NodeStoppingException { - GridCacheVersion clearVer = ctx.versions().next(); + void clearTombstones(EvictionContext evictionCtx) throws IgniteCheckedException { + if (evictionCtx.shouldStop()) + return; - GridCacheObsoleteEntryExtras extras = new GridCacheObsoleteEntryExtras(clearVer); + GridIterator iter; - boolean rec = grp.eventRecordable(EVT_CACHE_REBALANCE_OBJECT_UNLOADED); + try { + GridCursor cur = store.cursor(CacheDataRowAdapter.RowData.TOMBSTONES); - if (grp.sharedGroup()) - cacheMaps.forEach((key, hld) -> clear(hld.map, extras, rec)); - else - clear(singleCacheEntryMap.map, extras, rec); + iter = new GridIteratorAdapter() { + @Override public boolean hasNextX() throws IgniteCheckedException { + return cur.next(); + } - long cleared = 0; + @Override public CacheDataRow nextX() throws IgniteCheckedException { + return cur.get(); + } - final int stopCheckingFreq = 1000; + @Override public void removeX() throws IgniteCheckedException { + throw new UnsupportedOperationException(); + } + }; + } + catch (IgniteCheckedException e) { + throw new IgniteCheckedException("Failed to get iterator for partition: " + id, e); + } - CacheMapHolder hld = grp.sharedGroup() ? null : singleCacheEntryMap; + doClear( + evictionCtx, + 10, + iter, + (hld, row) -> { + while (true) { + GridCacheMapEntry cached = null; - try { - GridIterator it0 = grp.offheap().partitionIterator(id); + try { + cached = putEntryIfObsoleteOrAbsent( + hld, + hld.cctx, + grp.affinity().lastVersion(), + row.key(), + true, + false); - while (it0.hasNext()) { - ctx.database().checkpointReadLock(); + cached.removeTombstone(row.version()); - try { - CacheDataRow row = it0.next(); + return true; + } + catch (GridCacheEntryRemovedException e) { + cached = null; + } + finally { + if (cached != null) + cached.touch(); + } + } + } + ); + } - // Do not clear fresh rows in case of partition reloading. - // This is required because normal updates are possible to moving partition which is currently cleared. - if (row.version().compareTo(clearVer) >= 0 && state() == MOVING) - continue; + /** + * Runs abstract clear operation over partition data rows. + * + * @param evictionCtx Eviction context. + * @param stopCheckingFreq Frequency to check stopping eviction/clearing. + * @param rowIter Rows iterator. + * @param clearOp Clear operation. + * @return Number of cleared rows. + * @throws IgniteCheckedException If failed. + */ + private long doClear( + EvictionContext evictionCtx, + int stopCheckingFreq, + GridIterator rowIter, + ClearRowOperation clearOp + ) throws IgniteCheckedException { + long cleared = 0; - if (grp.sharedGroup() && (hld == null || hld.cctx.cacheId() != row.cacheId())) - hld = cacheMapHolder(ctx.cacheContext(row.cacheId())); + CacheMapHolder hld = grp.sharedGroup() ? null : singleCacheEntryMap; - assert hld != null; + while (rowIter.hasNext()) { + ctx.database().checkpointReadLock(); - GridCacheMapEntry cached = putEntryIfObsoleteOrAbsent( - hld, - hld.cctx, - grp.affinity().lastVersion(), - row.key(), - true, - false); + try { + CacheDataRow row = rowIter.next(); - if (cached instanceof GridDhtCacheEntry && ((GridDhtCacheEntry)cached).clearInternal(clearVer, extras)) { - removeEntry(cached); + assert row.key() != null : row; + assert row.version() != null : row; - if (rec && !hld.cctx.config().isEventsDisabled()) { - hld.cctx.events().addEvent(cached.partition(), - cached.key(), - ctx.localNodeId(), - null, - null, - null, - EVT_CACHE_REBALANCE_OBJECT_UNLOADED, - null, - false, - cached.rawGet(), - cached.hasValue(), - null, - null, - null, - false); - } + if (grp.sharedGroup() && (hld == null || hld.cctx.cacheId() != row.cacheId())) + hld = cacheMapHolder(ctx.cacheContext(row.cacheId())); - cleared++; - } + assert hld != null; - // For each 'stopCheckingFreq' cleared entities check clearing process to stop. - if (cleared % stopCheckingFreq == 0 && evictionCtx.shouldStop()) - return cleared; - } - catch (GridDhtInvalidPartitionException e) { - assert isEmpty() && state() == EVICTED : "Invalid error [e=" + e + ", part=" + this + ']'; + if (clearOp.apply(hld, row)) + cleared++; - break; // Partition is already concurrently cleared and evicted. - } - finally { - ctx.database().checkpointReadUnlock(); - } + // For each 'stopCheckingFreq' cleared entities check clearing process to stop. + if (cleared % stopCheckingFreq == 0 && evictionCtx.shouldStop()) + return cleared; } + catch (GridDhtInvalidPartitionException e) { + assert isEmpty() && state() == EVICTED : "Invalid error [e=" + e + ", part=" + this + ']'; - if (forceTestCheckpointOnEviction) { - if (partWhereTestCheckpointEnforced == null && cleared >= fullSize()) { - ctx.database().forceCheckpoint("test").finishFuture().get(); - - log.warning("Forced checkpoint by test reasons for partition: " + this); - - partWhereTestCheckpointEnforced = id; - } + break; // Partition is already concurrently cleared and evicted. + } + finally { + ctx.database().checkpointReadUnlock(); } - } - catch (NodeStoppingException e) { - if (log.isDebugEnabled()) - log.debug("Failed to get iterator for evicted partition: " + id); - - throw e; - } - catch (IgniteCheckedException e) { - U.error(log, "Failed to get iterator for evicted partition: " + id, e); } return cleared; @@ -1239,9 +1334,11 @@ private long clearAll(EvictionContext evictionCtx) throws NodeStoppingException * @param evt Unload event flag. * @throws NodeStoppingException If current node is stopping. */ - private void clear(ConcurrentMap map, + private void clearOnheapEntries( + ConcurrentMap map, GridCacheObsoleteEntryExtras extras, - boolean evt) throws NodeStoppingException { + boolean evt + ) throws NodeStoppingException { Iterator it = map.values().iterator(); while (it.hasNext()) { @@ -1533,6 +1630,18 @@ long expireTime() { } } + /** + * Abstract operation to clear row. + */ + @FunctionalInterface + private static interface ClearRowOperation { + /** + * @param hld Hld. + * @param row Row. + */ + boolean apply(CacheMapHolder hld, CacheDataRow row) throws IgniteCheckedException; + } + /** * Future is needed to control partition clearing process. * Future can be used both for single clearing or eviction processes. diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/GridDhtPartitionTopologyImpl.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/GridDhtPartitionTopologyImpl.java index 2078eabd701f9..1f90108f2a0c3 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/GridDhtPartitionTopologyImpl.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/GridDhtPartitionTopologyImpl.java @@ -716,9 +716,10 @@ private boolean partitionLocalNode(int p, AffinityTopologyVersion topVer) { updateLocal(p, state, updateSeq, topVer); - // Restart cleaning. - if (state == RENTING) + if (state == RENTING) // Restart cleaning. locPart.clearAsync(); + else if (state == OWNING && locPart.hasTombstones()) + locPart.clearTombstonesAsync(); // Restart tombstones cleaning. } } } diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/PartitionsEvictManager.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/PartitionsEvictManager.java index 31399bd907894..2d3b813aec9b3 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/PartitionsEvictManager.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/PartitionsEvictManager.java @@ -21,6 +21,7 @@ import java.util.Comparator; import java.util.HashSet; import java.util.Map; +import java.util.Objects; import java.util.Queue; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; @@ -106,6 +107,18 @@ public void onCacheGroupStopped(CacheGroupContext grp){ } } + /** + * @param grp Group context. + * @param part Partition to clear tombstones. + */ + public void clearTombstonesAsync(CacheGroupContext grp, GridDhtLocalPartition part) { + if (addAsyncTask(grp, part, TaskType.CLEAR_TOMBSTONES)) { + if (log.isDebugEnabled()) + log.debug("Partition has been scheduled for tomstones cleanup [grp=" + grp.cacheOrGroupName() + + ", p=" + part.id() + ", state=" + part.state() + "]"); + } + } + /** * Adds partition to eviction queue and starts eviction process if permit available. * @@ -113,29 +126,56 @@ public void onCacheGroupStopped(CacheGroupContext grp){ * @param part Partition to evict. */ public void evictPartitionAsync(CacheGroupContext grp, GridDhtLocalPartition part) { + if (addAsyncTask(grp, part, TaskType.EVICT)) { + if (log.isDebugEnabled()) + log.debug("Partition has been scheduled for eviction [grp=" + grp.cacheOrGroupName() + + ", p=" + part.id() + ", state=" + part.state() + "]"); + } + } + + /** + * @param grp Group context. + * @param part Partition. + * @param type Task type. + * @return {@code True} if task was added. + */ + private boolean addAsyncTask(CacheGroupContext grp, GridDhtLocalPartition part, TaskType type) { GroupEvictionContext grpEvictionCtx = evictionGroupsMap.computeIfAbsent( grp.groupId(), (k) -> new GroupEvictionContext(grp)); // Check node stop. if (grpEvictionCtx.shouldStop()) - return; + return false; int bucket; + AbstractEvictionTask task; + + switch (type) { + case EVICT: + task = new PartitionEvictionTask(part, grpEvictionCtx); + break; + + case CLEAR_TOMBSTONES: + task = new ClearTombstonesTask(part, grpEvictionCtx); + break; + + default: + throw new UnsupportedOperationException("Unsupported task type: " + type); + } + synchronized (mux) { - if (!grpEvictionCtx.partIds.add(part.id())) - return; + if (!grpEvictionCtx.taskIds.add(task.id)) + return false; - bucket = evictionQueue.offer(new PartitionEvictionTask(part, grpEvictionCtx)); + bucket = evictionQueue.offer(task); } - grpEvictionCtx.totalTasks.incrementAndGet(); + grpEvictionCtx.taskAdded(task); - if (log.isDebugEnabled()) - log.debug("Partition has been scheduled for eviction [grp=" + grp.cacheOrGroupName() - + ", p=" + part.id() + ", state=" + part.state() + "]"); + scheduleNextTask(bucket); - scheduleNextPartitionEviction(bucket); + return true; } /** @@ -143,7 +183,7 @@ public void evictPartitionAsync(CacheGroupContext grp, GridDhtLocalPartition par * * @param bucket Bucket. */ - private void scheduleNextPartitionEviction(int bucket) { + private void scheduleNextTask(int bucket) { // Check node stop. if (sharedEvictionCtx.shouldStop()) return; @@ -158,7 +198,7 @@ private void scheduleNextPartitionEviction(int bucket) { // Get task until we have permits. while (permits >= 0) { // Get task from bucket. - PartitionEvictionTask evictionTask = evictionQueue.poll(bucket); + AbstractEvictionTask evictionTask = evictionQueue.poll(bucket); // If bucket empty try get from another. if (evictionTask == null) { @@ -198,12 +238,12 @@ private void scheduleNextPartitionEviction(int bucket) { permits++; } - // Re-schedule new one task form same bucket. - scheduleNextPartitionEviction(bucket); + // Re-schedule new one task for same bucket. + scheduleNextTask(bucket); }); // Submit task to executor. - cctx.kernalContext() + cctx.kernalContext() .closure() .runLocalSafe(evictionTask, EVICT_POOL_PLC); } @@ -219,10 +259,10 @@ private void showProgress() { int size = evictionQueue.size() + 1; // Queue size plus current partition. if (log.isInfoEnabled()) - log.info("Eviction in progress [permits=" + permits+ + log.info("Partition cleanup in progress [permits=" + permits+ ", threads=" + threads + ", groups=" + evictionGroupsMap.keySet().size() + - ", remainingPartsToEvict=" + size + "]"); + ", remainingTasks=" + size + "]"); evictionGroupsMap.values().forEach(GroupEvictionContext::showProgress); @@ -265,6 +305,39 @@ private void showProgress() { evictionGrps.forEach(GroupEvictionContext::awaitFinishAll); } + /** + * + */ + private static class TasksStatistics { + /** */ + private int total; + + /** */ + private int inProgress; + + /** + * + */ + void taskAdded() { + total++; + } + + /** + * + */ + void taskStarted() { + inProgress++; + } + + /** + * + */ + void taskFinished() { + total--; + inProgress--; + } + } + /** * */ @@ -272,26 +345,29 @@ private class GroupEvictionContext implements EvictionContext { /** */ private final CacheGroupContext grp; - /** Deduplicate set partition ids. */ - private final Set partIds = new HashSet<>(); + /** Deduplicate set partition tasks. */ + private final Set taskIds = new HashSet<>(); /** Future for currently running partition eviction task. */ - private final Map> partsEvictFutures = new ConcurrentHashMap<>(); + private final Map> taskFutures = new ConcurrentHashMap<>(); /** Flag indicates that eviction process has stopped for this group. */ private volatile boolean stop; - /** Total partition to evict. */ + /** Total tasks. */ private AtomicInteger totalTasks = new AtomicInteger(); - /** Total partition evict in progress. */ - private int taskInProgress; + /** */ + private Map stats = U.newHashMap(TaskType.VALS.length); /** * @param grp Group context. */ private GroupEvictionContext(CacheGroupContext grp) { this.grp = grp; + + for (TaskType type : TaskType.VALS) + stats.put(type, new TasksStatistics()); } /** {@inheritDoc} */ @@ -299,29 +375,38 @@ private GroupEvictionContext(CacheGroupContext grp) { return stop || sharedEvictionCtx.shouldStop(); } + /** + * @param task Task. + */ + void taskAdded(AbstractEvictionTask task) { + totalTasks.incrementAndGet(); + + synchronized (this) { + stats.get(task.id.type).taskAdded(); + } + } + /** * * @param task Partition eviction task. */ - private synchronized void taskScheduled(PartitionEvictionTask task) { + private synchronized void taskScheduled(AbstractEvictionTask task) { if (shouldStop()) return; - taskInProgress++; + stats.get(task.id.type).taskStarted(); GridFutureAdapter fut = task.finishFut; - int partId = task.part.id(); + taskIds.remove(task.id); - partIds.remove(partId); - - partsEvictFutures.put(partId, fut); + taskFutures.put(task.id, fut); fut.listen(f -> { synchronized (this) { - taskInProgress--; + stats.get(task.id.type).taskFinished(); - partsEvictFutures.remove(partId, f); + taskFutures.remove(task.id, f); if (totalTasks.decrementAndGet() == 0) evictionGroupsMap.remove(grp.groupId()); @@ -340,7 +425,7 @@ private void stop() { * Await evict finish. */ private void awaitFinishAll(){ - partsEvictFutures.forEach(this::awaitFinish); + taskFutures.forEach(this::awaitFinish); evictionGroupsMap.remove(grp.groupId()); } @@ -348,17 +433,17 @@ private void awaitFinishAll(){ /** * Await evict finish partition. */ - private void awaitFinish(Integer part, IgniteInternalFuture fut) { + private void awaitFinish(TaskId taskId, IgniteInternalFuture fut) { // Wait for last offered partition eviction completion try { - log.info("Await partition evict, grpName=" + grp.cacheOrGroupName() + - ", grpId=" + grp.groupId() + ", partId=" + part); + log.info("Await partition cleanup [grpName=" + grp.cacheOrGroupName() + + ", grpId=" + grp.groupId() + ", task=" + taskId.type + ", partId=" + taskId.part + ']'); fut.get(); } catch (IgniteCheckedException e) { if (log.isDebugEnabled()) - log.warning("Failed to await partition eviction during stopping.", e); + log.warning("Failed to await partition cleanup during stopping.", e); } } @@ -366,47 +451,132 @@ private void awaitFinish(Integer part, IgniteInternalFuture fut) { * Shows progress group of eviction. */ private void showProgress() { - if (log.isInfoEnabled()) - log.info("Group eviction in progress [grpName=" + grp.cacheOrGroupName()+ - ", grpId=" + grp.groupId() + - ", remainingPartsToEvict=" + (totalTasks.get() - taskInProgress) + - ", partsEvictInProgress=" + taskInProgress + - ", totalParts=" + grp.topology().localPartitions().size() + "]"); + if (log.isInfoEnabled()) { + StringBuilder msg = new StringBuilder( + "Group cleanup in progress [grpName=" + grp.cacheOrGroupName() + ", grpId=" + grp.groupId()); + + synchronized (this) { + TasksStatistics evicts = stats.get(TaskType.EVICT); + if (evicts.total > 0) { + msg.append(", remainingPartsToEvict=" + (evicts.total - evicts.inProgress)). + append(", partsEvictInProgress=" + evicts.inProgress); + } + + TasksStatistics tombstones = stats.get(TaskType.CLEAR_TOMBSTONES); + if (tombstones.total > 0) { + msg.append(", remainingPartsToClearTombstones=" + (tombstones.total - tombstones.inProgress)). + append(", tombstoneClearInProgress=" + tombstones.inProgress); + } + } + + msg.append(", totalParts=" + grp.topology().localPartitions().size() + "]"); + + log.info(msg.toString()); + } } } /** - * Task for self-scheduled partition eviction / clearing. + * + */ + private enum TaskType { + /** */ + EVICT, + + /** */ + CLEAR_TOMBSTONES; + + /** */ + private static TaskType[] VALS = values(); + } + + /** + * */ - class PartitionEvictionTask implements Runnable { + private static class TaskId { + /** */ + final int part; + + /** */ + final TaskType type; + + /** + * @param part Partiotion id. + * @param type Task type. + */ + TaskId(int part, TaskType type) { + this.part = part; + this.type = type; + } + + /** {@inheritDoc} */ + @Override public boolean equals(Object o) { + if (this == o) + return true; + + if (o == null || getClass() != o.getClass()) + return false; + + TaskId taskKey = (TaskId)o; + + return part == taskKey.part && type == taskKey.type; + } + + /** {@inheritDoc} */ + @Override public int hashCode() { + return Objects.hash(part, type); + } + } + + /** + * + */ + abstract class AbstractEvictionTask implements Runnable { /** Partition to evict. */ - private final GridDhtLocalPartition part; + protected final GridDhtLocalPartition part; /** */ - private final long size; + protected final long size; /** Eviction context. */ - private final GroupEvictionContext grpEvictionCtx; + protected final GroupEvictionContext grpEvictionCtx; /** */ - private final GridFutureAdapter finishFut = new GridFutureAdapter<>(); + protected final GridFutureAdapter finishFut = new GridFutureAdapter<>(); + + /** */ + private final TaskId id; /** * @param part Partition. * @param grpEvictionCtx Eviction context. */ - private PartitionEvictionTask( - GridDhtLocalPartition part, - GroupEvictionContext grpEvictionCtx + private AbstractEvictionTask( + GridDhtLocalPartition part, + GroupEvictionContext grpEvictionCtx, + TaskType type ) { this.part = part; this.grpEvictionCtx = grpEvictionCtx; + id = new TaskId(part.id(), type); + size = part.fullSize(); } + /** + * @return {@code False} if need retry task later. + * @throws IgniteCheckedException If failed. + */ + abstract boolean run0() throws IgniteCheckedException; + + /** + * + */ + abstract void scheduleRetry(); + /** {@inheritDoc} */ - @Override public void run() { + @Override public final void run() { if (grpEvictionCtx.shouldStop()) { finishFut.onDone(); @@ -414,12 +584,7 @@ private PartitionEvictionTask( } try { - boolean success = part.tryClear(grpEvictionCtx); - - if (success) { - if (part.state() == GridDhtPartitionState.EVICTED && part.markForDestroy()) - part.destroy(); - } + boolean success = run0(); // Complete eviction future before schedule new to prevent deadlock with // simultaneous eviction stopping and scheduling new eviction. @@ -427,7 +592,7 @@ private PartitionEvictionTask( // Re-offer partition if clear was unsuccessful due to partition reservation. if (!success) - evictPartitionAsync(grpEvictionCtx.grp, part); + scheduleRetry(); } catch (Throwable ex) { finishFut.onDone(ex); @@ -438,7 +603,7 @@ private PartitionEvictionTask( true); } else { - LT.error(log, ex, "Partition eviction failed, this can cause grid hang."); + LT.error(log, ex, "Partition eviction failed."); cctx.kernalContext().failure().process(new FailureContext(SYSTEM_WORKER_TERMINATION, ex)); } @@ -446,16 +611,81 @@ private PartitionEvictionTask( } } + /** + * Task for self-scheduled partition eviction / clearing. + */ + class PartitionEvictionTask extends AbstractEvictionTask { + /** + * @param part Partition. + * @param grpEvictionCtx Eviction context. + */ + private PartitionEvictionTask( + GridDhtLocalPartition part, + GroupEvictionContext grpEvictionCtx + ) { + super(part, grpEvictionCtx, TaskType.EVICT); + } + + /** {@inheritDoc} */ + @Override void scheduleRetry() { + evictPartitionAsync(grpEvictionCtx.grp, part); + } + + /** {@inheritDoc} */ + @Override public boolean run0() throws IgniteCheckedException { + assert part.state() != GridDhtPartitionState.OWNING : part; + + boolean success = part.tryClear(grpEvictionCtx); + + assert part.state() != GridDhtPartitionState.OWNING : part; + + if (success) { + if (part.state() == GridDhtPartitionState.EVICTED && part.markForDestroy()) + part.destroy(); + } + + return success; + } + } + + /** + * + */ + class ClearTombstonesTask extends AbstractEvictionTask { + /** + * @param part Partition. + * @param grpEvictionCtx Eviction context. + */ + private ClearTombstonesTask( + GridDhtLocalPartition part, + GroupEvictionContext grpEvictionCtx + ) { + super(part, grpEvictionCtx, TaskType.CLEAR_TOMBSTONES); + } + + /** {@inheritDoc} */ + @Override void scheduleRetry() { + throw new UnsupportedOperationException(); + } + + /** {@inheritDoc} */ + @Override public boolean run0() throws IgniteCheckedException { + part.clearTombstones(grpEvictionCtx); + + return true; + } + } + /** * */ class BucketQueue { + /** Queues contains partitions scheduled for eviction. */ + final Queue[] buckets; + /** */ private final long[] bucketSizes; - /** Queues contains partitions scheduled for eviction. */ - final Queue[] buckets; - /** * @param buckets Number of buckets. */ @@ -474,8 +704,8 @@ class BucketQueue { * @param bucket Bucket index. * @return Partition evict task, or {@code null} if bucket queue is empty. */ - PartitionEvictionTask poll(int bucket) { - PartitionEvictionTask task = buckets[bucket].poll(); + AbstractEvictionTask poll(int bucket) { + AbstractEvictionTask task = buckets[bucket].poll(); if (task != null) bucketSizes[bucket] -= task.size; @@ -488,7 +718,7 @@ PartitionEvictionTask poll(int bucket) { * * @return Partition evict task. */ - PartitionEvictionTask pollAny() { + AbstractEvictionTask pollAny() { for (int bucket = 0; bucket < bucketSizes.length; bucket++){ if (!buckets[bucket].isEmpty()) return poll(bucket); @@ -503,7 +733,7 @@ PartitionEvictionTask pollAny() { * @param task Eviction task. * @return Bucket index. */ - int offer(PartitionEvictionTask task) { + int offer(AbstractEvictionTask task) { int bucket = calculateBucket(); buckets[bucket].offer(task); @@ -527,7 +757,7 @@ boolean isEmpty(){ int size(){ int size = 0; - for (Queue queue : buckets) + for (Queue queue : buckets) size += queue.size(); return size; @@ -557,7 +787,7 @@ private int calculateBucket() { * * @return Queue for evict partitions. */ - private Queue createEvictPartitionQueue() { + private Queue createEvictPartitionQueue() { switch (QUEUE_TYPE) { case 1: return new PriorityBlockingQueue<>( diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/CacheDataRow.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/CacheDataRow.java index 746b94aa4d805..0b7c4acd3631f 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/CacheDataRow.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/CacheDataRow.java @@ -59,6 +59,11 @@ public interface CacheDataRow extends MvccUpdateVersionAware, CacheSearchRow, St */ public void key(KeyCacheObject key); + /** + * @param cacheId Cache ID. + */ + public void cacheId(int cacheId); + /** {@inheritDoc} */ @Override public default IOVersions ioVersions() { return DataPageIO.VERSIONS; diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/CacheDataRowAdapter.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/CacheDataRowAdapter.java index 06e7214056e4b..a3b876d76510b 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/CacheDataRowAdapter.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/CacheDataRowAdapter.java @@ -52,6 +52,7 @@ import static org.apache.ignite.internal.processors.cache.mvcc.MvccUtils.MVCC_OP_COUNTER_NA; import static org.apache.ignite.internal.processors.cache.persistence.CacheDataRowAdapter.RowData.KEY_ONLY; import static org.apache.ignite.internal.processors.cache.persistence.CacheDataRowAdapter.RowData.LINK_WITH_HEADER; +import static org.apache.ignite.internal.processors.cache.persistence.CacheDataRowAdapter.RowData.TOMBSTONES; /** * Cache data row adapter. @@ -351,9 +352,7 @@ private IncompleteObject readIncomplete( buf.position(off); buf.limit(off + payloadSize); - boolean keyOnly = rowData == RowData.KEY_ONLY; - - incomplete = readFragment(sharedCtx, coctx, buf, keyOnly, readCacheId, incomplete, skipVer); + incomplete = readFragment(sharedCtx, coctx, buf, rowData, readCacheId, incomplete, skipVer); if (incomplete != null) incomplete.setNextLink(nextLink); @@ -379,7 +378,7 @@ protected int readHeader(GridCacheSharedContext sharedCtx, long addr, int * @param sharedCtx Cache shared context. * @param coctx Cache object context. * @param buf Buffer. - * @param keyOnly {@code true} If need to read only key object. + * @param rowData Required row data. * @param readCacheId {@code true} If need to read cache ID. * @param incomplete Incomplete object. * @param skipVer Whether version read should be skipped. @@ -390,11 +389,13 @@ protected IncompleteObject readFragment( GridCacheSharedContext sharedCtx, CacheObjectContext coctx, ByteBuffer buf, - boolean keyOnly, + RowData rowData, boolean readCacheId, IncompleteObject incomplete, boolean skipVer ) throws IgniteCheckedException { + boolean tombstones = rowData == TOMBSTONES; + if (readCacheId && cacheId == 0) { incomplete = readIncompleteCacheId(buf, incomplete); @@ -416,6 +417,12 @@ protected IncompleteObject readFragment( // Read key. if (key == null) { + if (tombstones && sharedCtx.database().isTombstone(buf, key, (IncompleteCacheObject)incomplete) == Boolean.FALSE) { + verReady = true; + + return null; + } + incomplete = readIncompleteKey(coctx, buf, (IncompleteCacheObject)incomplete); if (key == null) { @@ -423,7 +430,7 @@ protected IncompleteObject readFragment( return incomplete; // Need to finish reading the key. } - if (keyOnly) + if (rowData == RowData.KEY_ONLY) return null; // Key is ready - we are done! incomplete = null; @@ -442,6 +449,13 @@ protected IncompleteObject readFragment( // Read value. if (val == null) { + if (tombstones && sharedCtx.database().isTombstone(buf, key, (IncompleteCacheObject)incomplete) == Boolean.FALSE) { + key = null; + verReady = true; + + return null; + } + incomplete = readIncompleteValue(coctx, buf, (IncompleteCacheObject)incomplete); if (val == null) { @@ -452,6 +466,14 @@ protected IncompleteObject readFragment( incomplete = null; } + if (tombstones && !sharedCtx.database().isTombstone(this)) { + key = null; + val = null; + verReady = true; + + return null; + } + // Read version. if (!verReady) { incomplete = readIncompleteVersion(buf, incomplete, skipVer); @@ -498,6 +520,14 @@ protected void readFullRow( int len = PageUtils.getInt(addr, off); off += 4; + boolean tombstones = rowData == RowData.TOMBSTONES; + + if (tombstones && !sharedCtx.database().isTombstone(addr + off + len + 1)) { + verReady = true; // Mark as ready, no need to read any data. + + return; + } + if (rowData != RowData.NO_KEY && rowData != RowData.NO_KEY_WITH_HINTS) { byte type = PageUtils.getByte(addr, off); off++; @@ -519,10 +549,13 @@ protected void readFullRow( byte type = PageUtils.getByte(addr, off); off++; - byte[] bytes = PageUtils.getBytes(addr, off, len); - off += len; + if (!tombstones) { + byte[] bytes = PageUtils.getBytes(addr, off, len); + + val = coctx.kernalContext().cacheObjects().toCacheObject(coctx, type, bytes); + } - val = coctx.kernalContext().cacheObjects().toCacheObject(coctx, type, bytes); + off += len; int verLen; @@ -820,6 +853,11 @@ public boolean isReady() { return cacheId; } + /** {@inheritDoc} */ + @Override public void cacheId(int cacheId) { + this.cacheId = cacheId; + } + /** {@inheritDoc} */ @Override public CacheObject value() { assert val != null : "Value is not ready: " + this; @@ -936,7 +974,10 @@ public enum RowData { FULL_WITH_HINTS, /** Force instant hints actualization for update operation with history (to avoid races with vacuum). */ - NO_KEY_WITH_HINTS + NO_KEY_WITH_HINTS, + + /** Do not read row data for non-tombstone entries. */ + TOMBSTONES } /** {@inheritDoc} */ diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/GridCacheOffheapManager.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/GridCacheOffheapManager.java index b3213217b5d5c..f787e6e937c1f 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/GridCacheOffheapManager.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/GridCacheOffheapManager.java @@ -52,7 +52,7 @@ import org.apache.ignite.internal.pagemem.wal.record.RollbackRecord; import org.apache.ignite.internal.pagemem.wal.record.WALRecord; import org.apache.ignite.internal.pagemem.wal.record.delta.MetaPageInitRecord; -import org.apache.ignite.internal.pagemem.wal.record.delta.MetaPageUpdatePartitionDataRecordV2; +import org.apache.ignite.internal.pagemem.wal.record.delta.MetaPageUpdatePartitionDataRecordV3; import org.apache.ignite.internal.pagemem.wal.record.delta.PartitionDestroyRecord; import org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion; import org.apache.ignite.internal.processors.cache.CacheDiagnosticManager; @@ -297,14 +297,10 @@ private void saveStoreMetadata( if (rowStore0 != null) { ((CacheFreeList)rowStore0.freeList()).saveMetadata(grp.statisticsHolderData()); - PartitionMetaStorage partStore = store.partStorage(); - long updCntr = store.updateCounter(); long size = store.fullSize(); long rmvId = globalRemoveId().get(); - byte[] updCntrsBytes = store.partUpdateCounter().getBytes(); - PageMemoryEx pageMem = (PageMemoryEx)grp.dataRegion().pageMemory(); IgniteWriteAheadLogManager wal = this.ctx.wal(); @@ -329,6 +325,9 @@ private void saveStoreMetadata( return; } + assert state != null || grp.isLocal() : "Partition state is undefined " + + "[grp=" + grp.cacheOrGroupName() + ", part=" + part + "]"; + int grpId = grp.groupId(); long partMetaId = pageMem.partitionMetaPageId(grpId, store.partId()); @@ -349,126 +348,29 @@ private void saveStoreMetadata( try { PagePartitionMetaIOV2 io = PageIO.getPageIO(partMetaPageAddr); - long link = io.getGapsLink(partMetaPageAddr); - - if (updCntrsBytes == null && link != 0) { - partStore.removeDataRowByLink(link, grp.statisticsHolderData()); - - io.setGapsLink(partMetaPageAddr, (link = 0)); - - changed = true; - } - else if (updCntrsBytes != null && link == 0) { - SimpleDataRow row = new SimpleDataRow(store.partId(), updCntrsBytes); - - partStore.insertDataRow(row, grp.statisticsHolderData()); - - io.setGapsLink(partMetaPageAddr, (link = row.link())); - - changed = true; - } - else if (updCntrsBytes != null && link != 0) { - byte[] prev = partStore.readRow(link); - - assert prev != null : "Read null gaps using link=" + link; - - if (!Arrays.equals(prev, updCntrsBytes)) { - partStore.removeDataRowByLink(link, grp.statisticsHolderData()); - - SimpleDataRow row = new SimpleDataRow(store.partId(), updCntrsBytes); - - partStore.insertDataRow(row, grp.statisticsHolderData()); - - io.setGapsLink(partMetaPageAddr, (link = row.link())); - - changed = true; - } - } - - if (changed) - partStore.saveMetadata(grp.statisticsHolderData()); - + changed |= io.setPartitionState(partMetaPageAddr, state != null ? (byte)state.ordinal() : -1); changed |= io.setUpdateCounter(partMetaPageAddr, updCntr); changed |= io.setGlobalRemoveId(partMetaPageAddr, rmvId); changed |= io.setSize(partMetaPageAddr, size); + changed |= io.setTombstonesCount(partMetaPageAddr, store.tombstonesCount()); + changed |= savePartitionUpdateCounterGaps(store, io, partMetaPageAddr); + changed |= saveCacheSizes(store, io, partMetaPageAddr); - if (state != null) - changed |= io.setPartitionState(partMetaPageAddr, (byte)state.ordinal()); - else - assert grp.isLocal() : grp.cacheOrGroupName(); - - long cntrsPageId; - - if (grp.sharedGroup()) { - long initCntrPageId = io.getCountersPageId(partMetaPageAddr); - - Map newSizes = store.cacheSizes(); - Map prevSizes = readSharedGroupCacheSizes(pageMem, grpId, initCntrPageId); - - if (prevSizes != null && prevSizes.equals(newSizes)) - cntrsPageId = initCntrPageId; // Preventing modification of sizes pages for store - else { - cntrsPageId = writeSharedGroupCacheSizes(pageMem, grpId, initCntrPageId, - store.partId(), newSizes); - - if (initCntrPageId == 0 && cntrsPageId != 0) { - io.setCountersPageId(partMetaPageAddr, cntrsPageId); - - changed = true; - } - } - } - else - cntrsPageId = 0L; - - int pageCnt; - - if (needSnapshot) { - pageCnt = this.ctx.pageStore().pages(grpId, store.partId()); - - io.setCandidatePageCount(partMetaPageAddr, size == 0 ? 0 : pageCnt); - - if (state == OWNING) { - assert part != null; - - if (!addPartition( - part, - ctx.partitionStatMap(), - partMetaPageAddr, - io, - grpId, - store.partId(), - this.ctx.pageStore().pages(grpId, store.partId()), - store.fullSize() - )) - U.warn(log, "Partition was concurrently evicted grpId=" + grpId + - ", partitionId=" + part.id()); - } - else if (state == MOVING || state == RENTING) { - if (ctx.partitionStatMap().forceSkipIndexPartition(grpId)) { - if (log.isInfoEnabled()) - log.info("Will not include SQL indexes to snapshot because there is " + - "a partition not in " + OWNING + " state [grp=" + grp.cacheOrGroupName() + - ", partId=" + store.partId() + ", state=" + state + ']'); - } - } - - changed = true; - } - else - pageCnt = io.getCandidatePageCount(partMetaPageAddr); + if (needSnapshot) + changed |= savePagesCount(ctx, part, store, io, partMetaPageAddr); if (changed && PageHandler.isWalDeltaRecordNeeded(pageMem, grpId, partMetaId, partMetaPage, wal, null)) - wal.log(new MetaPageUpdatePartitionDataRecordV2( + wal.log(new MetaPageUpdatePartitionDataRecordV3( grpId, partMetaId, updCntr, rmvId, (int)size, // TODO: Partition size may be long - cntrsPageId, - state == null ? -1 : (byte)state.ordinal(), - pageCnt, - link + io.getCacheSizesPageId(partMetaPageAddr), + io.getPartitionState(partMetaPageAddr), + io.getCandidatePageCount(partMetaPageAddr), + io.getGapsLink(partMetaPageAddr), + io.getTombstonesCount(partMetaPageAddr) )); } finally { @@ -486,6 +388,158 @@ else if (needSnapshot) tryAddEmptyPartitionToSnapshot(store, ctx); } + /** + * Saves to partition meta page information about partition update counter gaps. + * + * @param store Partition data store. + * @param io I/O for partition meta page. + * @param partMetaPageAddr Partition meta page address. + * @return {@code True} if partition meta data is changed. + * @throws IgniteCheckedException If failed. + */ + private boolean savePartitionUpdateCounterGaps( + CacheDataStore store, + PagePartitionMetaIOV2 io, + long partMetaPageAddr + ) throws IgniteCheckedException { + PartitionMetaStorage partStore = store.partStorage(); + + byte[] updCntrsBytes = store.partUpdateCounter().getBytes(); + + long gapsLink = io.getGapsLink(partMetaPageAddr); + + boolean changed = false; + + if (updCntrsBytes == null && gapsLink != 0) { + partStore.removeDataRowByLink(gapsLink, grp.statisticsHolderData()); + + io.setGapsLink(partMetaPageAddr, 0); + + changed = true; + } + else if (updCntrsBytes != null && gapsLink == 0) { + SimpleDataRow row = new SimpleDataRow(store.partId(), updCntrsBytes); + + partStore.insertDataRow(row, grp.statisticsHolderData()); + + io.setGapsLink(partMetaPageAddr, row.link()); + + changed = true; + } + else if (updCntrsBytes != null && gapsLink != 0) { + byte[] prev = partStore.readRow(gapsLink); + + assert prev != null : "Read null gaps using link=" + gapsLink; + + if (!Arrays.equals(prev, updCntrsBytes)) { + partStore.removeDataRowByLink(gapsLink, grp.statisticsHolderData()); + + SimpleDataRow row = new SimpleDataRow(store.partId(), updCntrsBytes); + + partStore.insertDataRow(row, grp.statisticsHolderData()); + + io.setGapsLink(partMetaPageAddr, row.link()); + + changed = true; + } + } + + if (changed) + partStore.saveMetadata(grp.statisticsHolderData()); + + return changed; + } + + /** + * Saves to partition meta page information about logical cache sizes inside cache group. + * + * @param store Partition data store. + * @param io I/O for partition meta page. + * @param partMetaPageAddr Partition meta page address. + * @return {@code True} if partition meta data is changed. + * @throws IgniteCheckedException If failed. + */ + private boolean saveCacheSizes( + CacheDataStore store, + PagePartitionMetaIOV2 io, + long partMetaPageAddr + ) throws IgniteCheckedException { + if (grp.sharedGroup()) { + PageMemoryEx pageMem = (PageMemoryEx)grp.dataRegion().pageMemory(); + + long oldCacheSizesPageId = io.getCacheSizesPageId(partMetaPageAddr); + + Map newSizes = store.cacheSizes(); + Map prevSizes = readSharedGroupCacheSizes(pageMem, grp.groupId(), oldCacheSizesPageId); + + if (prevSizes == null || !prevSizes.equals(newSizes)) { + long cacheSizesPageId = writeSharedGroupCacheSizes(pageMem, grp.groupId(), oldCacheSizesPageId, + store.partId(), newSizes); + + if (oldCacheSizesPageId == 0 && cacheSizesPageId != 0) { + io.setSizesPageId(partMetaPageAddr, cacheSizesPageId); + + return true; + } + } + } + else + io.setSizesPageId(partMetaPageAddr, 0); + + return false; + } + + /** + * Saves to partition meta page information about pages count. + * + * @param ctx Checkpoint context. + * @param part Partition. + * @param store Partition data store. + * @param io I/O for partition meta page. + * @param partMetaPageAddr Partition meta page address. + * @return {@code True} if partition meta data is changed. + * @throws IgniteCheckedException If failed. + */ + private boolean savePagesCount( + Context ctx, + GridDhtLocalPartition part, + CacheDataStore store, + PagePartitionMetaIOV2 io, + long partMetaPageAddr + ) throws IgniteCheckedException { + int grpId = grp.groupId(); + int pageCnt = this.ctx.pageStore().pages(grpId, store.partId()); + + io.setCandidatePageCount(partMetaPageAddr, io.getSize(partMetaPageAddr) == 0 ? 0 : pageCnt); + + if (part.state() == OWNING) { + assert part != null; + + if (!addPartition( + part, + ctx.partitionStatMap(), + partMetaPageAddr, + io, + grpId, + store.partId(), + this.ctx.pageStore().pages(grp.groupId(), store.partId()), + store.fullSize() + )) + U.warn(log, "Partition was concurrently evicted grpId=" + grpId + + ", partitionId=" + part.id()); + } + else if (part.state() == MOVING || part.state() == RENTING) { + if (ctx.partitionStatMap().forceSkipIndexPartition(grpId)) { + if (log.isInfoEnabled()) + log.info("Will not include SQL indexes to snapshot because there is " + + "a partition not in " + OWNING + " state [grp=" + grp.cacheOrGroupName() + + ", partId=" + store.partId() + ", state=" + part.state() + ']'); + } + } + + return true; + } + /** {@inheritDoc} */ @Override public long restorePartitionStates(Map partitionRecoveryStates) throws IgniteCheckedException { if (grp.isLocal() || !grp.affinityNode() || !grp.dataRegion().config().isPersistenceEnabled()) @@ -647,11 +701,13 @@ private GridDhtLocalPartition getPartition(CacheDataStore store) { * return null if counter page does not exist. * @throws IgniteCheckedException If page memory operation failed. */ - @Nullable private static Map readSharedGroupCacheSizes(PageSupport pageMem, int grpId, - long cntrsPageId) throws IgniteCheckedException { - + private static Map readSharedGroupCacheSizes( + PageSupport pageMem, + int grpId, + long cntrsPageId + ) throws IgniteCheckedException { if (cntrsPageId == 0L) - return null; + return Collections.emptyMap(); Map cacheSizes = new HashMap<>(); @@ -684,6 +740,7 @@ private GridDhtLocalPartition getPartition(CacheDataStore store) { pageMem.releasePage(grpId, curId, curPage); } } + return cacheSizes; } @@ -1489,6 +1546,11 @@ private DataEntryRow(DataEntry entry) { return entry.cacheId(); } + /** {@inheritDoc} */ + @Override public void cacheId(int cacheId) { + throw new UnsupportedOperationException(); + } + /** {@inheritDoc} */ @Override public long mvccCoordinatorVersion() { return 0; // TODO IGNITE-7384 @@ -1822,16 +1884,19 @@ private CacheDataStore init0(boolean checkExists) throws IgniteCheckedException if (PageIO.getType(pageAddr) != 0) { PagePartitionMetaIOV2 io = (PagePartitionMetaIOV2)PagePartitionMetaIO.VERSIONS.latest(); - Map cacheSizes = null; - - if (grp.sharedGroup()) - cacheSizes = readSharedGroupCacheSizes(pageMem, grpId, io.getCountersPageId(pageAddr)); - - long link = io.getGapsLink(pageAddr); + long gapsLink = io.getGapsLink(pageAddr); - byte[] data = link == 0 ? null : partStorage.readRow(link); + byte[] updCntrGapsData = gapsLink == 0 ? null : partStorage.readRow(gapsLink); - delegate0.restoreState(io.getSize(pageAddr), io.getUpdateCounter(pageAddr), cacheSizes, data); + delegate0.restoreState( + io.getSize(pageAddr), + io.getUpdateCounter(pageAddr), + grp.sharedGroup() + ? readSharedGroupCacheSizes(pageMem, grpId, io.getCacheSizesPageId(pageAddr)) + : Collections.emptyMap(), + updCntrGapsData, + io.getTombstonesCount(pageAddr) + ); globalRemoveId().setIfGreater(io.getGlobalRemoveId(pageAddr)); } @@ -2430,6 +2495,20 @@ private Metas getOrAllocatePartitionMetas() throws IgniteCheckedException { delegate.remove(cctx, key, partId); } + /** {@inheritDoc} */ + @Override public void removeWithTombstone( + GridCacheContext cctx, + KeyCacheObject key, + GridCacheVersion ver, + GridDhtLocalPartition part + ) throws IgniteCheckedException { + assert ctx.database().checkpointLockIsHeldByThread(); + + CacheDataStore delegate = init0(false); + + delegate.removeWithTombstone(cctx, key, ver, part); + } + /** {@inheritDoc} */ @Override public CacheDataRow find(GridCacheContext cctx, KeyCacheObject key) throws IgniteCheckedException { CacheDataStore delegate = init0(true); @@ -2464,7 +2543,7 @@ private Metas getOrAllocatePartitionMetas() throws IgniteCheckedException { /** {@inheritDoc} */ @Override public GridCursor mvccAllVersionsCursor(GridCacheContext cctx, - KeyCacheObject key, Object x) throws IgniteCheckedException { + KeyCacheObject key, CacheDataRowAdapter.RowData x) throws IgniteCheckedException { CacheDataStore delegate = init0(true); if (delegate != null) @@ -2475,17 +2554,17 @@ private Metas getOrAllocatePartitionMetas() throws IgniteCheckedException { /** {@inheritDoc} */ - @Override public GridCursor cursor() throws IgniteCheckedException { + @Override public GridCursor cursor(boolean withTombstones) throws IgniteCheckedException { CacheDataStore delegate = init0(true); if (delegate != null) - return delegate.cursor(); + return delegate.cursor(withTombstones); return EMPTY_CURSOR; } /** {@inheritDoc} */ - @Override public GridCursor cursor(Object x) throws IgniteCheckedException { + @Override public GridCursor cursor(CacheDataRowAdapter.RowData x) throws IgniteCheckedException { CacheDataStore delegate = init0(true); if (delegate != null) @@ -2522,7 +2601,7 @@ private Metas getOrAllocatePartitionMetas() throws IgniteCheckedException { @Override public GridCursor cursor(int cacheId, KeyCacheObject lower, KeyCacheObject upper, - Object x) + CacheDataRowAdapter.RowData x) throws IgniteCheckedException { CacheDataStore delegate = init0(true); @@ -2536,13 +2615,14 @@ private Metas getOrAllocatePartitionMetas() throws IgniteCheckedException { @Override public GridCursor cursor(int cacheId, KeyCacheObject lower, KeyCacheObject upper, - Object x, - MvccSnapshot mvccSnapshot) + CacheDataRowAdapter.RowData x, + MvccSnapshot mvccSnapshot, + boolean withTombstones) throws IgniteCheckedException { CacheDataStore delegate = init0(true); if (delegate != null) - return delegate.cursor(cacheId, lower, upper, x, mvccSnapshot); + return delegate.cursor(cacheId, lower, upper, x, mvccSnapshot, withTombstones); return EMPTY_CURSOR; } @@ -2553,11 +2633,11 @@ private Metas getOrAllocatePartitionMetas() throws IgniteCheckedException { } /** {@inheritDoc} */ - @Override public GridCursor cursor(int cacheId) throws IgniteCheckedException { + @Override public GridCursor cursor(int cacheId, boolean withTombstones) throws IgniteCheckedException { CacheDataStore delegate = init0(true); if (delegate != null) - return delegate.cursor(cacheId); + return delegate.cursor(cacheId, withTombstones); return EMPTY_CURSOR; } @@ -2768,9 +2848,25 @@ private int purgeExpiredInternal( } } + /** {@inheritDoc} */ @Override public PartitionMetaStorage partStorage() { return partStorage; } + + /** {@inheritDoc} */ + @Override public long tombstonesCount() { + try { + CacheDataStore delegate0 = init0(true); + + if (delegate0 == null) + return 0; + + return delegate0.tombstonesCount(); + } + catch (IgniteCheckedException e) { + throw new IgniteException(e); + } + } } /** diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/IgniteCacheDatabaseSharedManager.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/IgniteCacheDatabaseSharedManager.java index a1a7913785d27..48a7f2087bfa4 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/IgniteCacheDatabaseSharedManager.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/IgniteCacheDatabaseSharedManager.java @@ -17,8 +17,8 @@ package org.apache.ignite.internal.processors.cache.persistence; -import javax.management.InstanceNotFoundException; import java.io.File; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -27,6 +27,7 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; +import javax.management.InstanceNotFoundException; import org.apache.ignite.DataRegionMetrics; import org.apache.ignite.DataRegionMetricsProvider; import org.apache.ignite.DataStorageMetrics; @@ -48,12 +49,16 @@ import org.apache.ignite.internal.mem.file.MappedFileMemoryProvider; import org.apache.ignite.internal.mem.unsafe.UnsafeMemoryProvider; import org.apache.ignite.internal.pagemem.PageMemory; +import org.apache.ignite.internal.pagemem.PageUtils; import org.apache.ignite.internal.pagemem.impl.PageMemoryNoStoreImpl; import org.apache.ignite.internal.pagemem.wal.WALPointer; import org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion; import org.apache.ignite.internal.processors.cache.CacheGroupContext; +import org.apache.ignite.internal.processors.cache.CacheObject; import org.apache.ignite.internal.processors.cache.GridCacheMapEntry; import org.apache.ignite.internal.processors.cache.GridCacheSharedManagerAdapter; +import org.apache.ignite.internal.processors.cache.IncompleteCacheObject; +import org.apache.ignite.internal.processors.cache.KeyCacheObject; import org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture; import org.apache.ignite.internal.processors.cache.persistence.evict.FairFifoPageEvictionTracker; import org.apache.ignite.internal.processors.cache.persistence.evict.NoOpPageEvictionTracker; @@ -136,7 +141,6 @@ public class IgniteCacheDatabaseSharedManager extends GridCacheSharedManagerAdap /** First eviction was warned flag. */ private volatile boolean firstEvictWarn; - /** {@inheritDoc} */ @Override protected void start0() throws IgniteCheckedException { if (cctx.kernalContext().clientNode() && cctx.kernalContext().config().getDataStorageConfiguration() == null) @@ -153,6 +157,102 @@ public class IgniteCacheDatabaseSharedManager extends GridCacheSharedManagerAdap initDataRegions(memCfg); } + /** + * @param row Row. + * @return {@code True} if given row is tombstone. + * @throws IgniteCheckedException If failed. + */ + public boolean isTombstone(@Nullable CacheDataRow row) throws IgniteCheckedException { + if (row == null) + return false; + + CacheObject val = row.value(); + + assert val != null : row; + + return val.cacheObjectType() == CacheObject.TOMBSTONE; + } + + /** + * @param buf Buffer. + * @param key Row key. + * @param incomplete Incomplete object. + * @return Tombstone flag or {@code null} if there is no enough data. + */ + public Boolean isTombstone( + ByteBuffer buf, + @Nullable KeyCacheObject key, + @Nullable IncompleteCacheObject incomplete + ) { + if (key == null) { + if (incomplete == null) { // Did not start read key yet. + if (buf.remaining() < IncompleteCacheObject.HEAD_LEN) + return null; + + int keySize = buf.getInt(buf.position()); + + int headOffset = (IncompleteCacheObject.HEAD_LEN + keySize) /* key */ + + 8 /* expire time */; + + int requiredSize = headOffset + IncompleteCacheObject.HEAD_LEN; // Value header. + + if (buf.remaining() < requiredSize) + return null; + + return isTombstone(buf, headOffset); + } + else { // Reading key, check if there is enogh data to check value header. + byte[] data = incomplete.data(); + + if (data == null) // Header is not available yet. + return null; + + int keyRemaining = data.length - incomplete.dataOffset(); + + assert keyRemaining > 0 : keyRemaining; + + int headOffset = keyRemaining + 8 /* expire time */; + + int requiredSize = headOffset + IncompleteCacheObject.HEAD_LEN; // Value header. + + if (buf.remaining() < requiredSize) + return null; + + return isTombstone(buf, headOffset); + } + } + + if (incomplete == null) { // Did not start read value yet. + if (buf.remaining() < IncompleteCacheObject.HEAD_LEN) + return null; + + return isTombstone(buf, 0); + } + + return incomplete.type() == CacheObject.TOMBSTONE; + } + + /** + * @param buf Buffer. + * @param offset Value offset. + * @return Tombstone flag or {@code null} if there is no enough data. + */ + private Boolean isTombstone(ByteBuffer buf, int offset) { + byte valType = buf.get(buf.position() + offset + 4); + + return valType == CacheObject.TOMBSTONE; + } + + /** + * @param addr Row address. + * @return {@code True} if stored value is tombstone. + */ + public boolean isTombstone(long addr) { + byte type = PageUtils.getByte(addr, 4); + + return type == CacheObject.TOMBSTONE; + } + /** * @param cfg Ignite configuration. * @param groupName Name of group. diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/io/PagePartitionMetaIO.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/io/PagePartitionMetaIO.java index c86a64a1e44f1..931fd92fbc4dd 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/io/PagePartitionMetaIO.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/io/PagePartitionMetaIO.java @@ -59,7 +59,7 @@ public class PagePartitionMetaIO extends PageMetaIO { setUpdateCounter(pageAddr, 0); setGlobalRemoveId(pageAddr, 0); setPartitionState(pageAddr, (byte)-1); - setCountersPageId(pageAddr, 0); + setSizesPageId(pageAddr, 0); } /** @@ -153,22 +153,22 @@ public boolean setPartitionState(long pageAddr, byte state) { } /** - * Returns partition counters page identifier, page with caches in cache group sizes. + * Returns page identifier related to page with logical cache sizes in cache group. * * @param pageAddr Partition metadata page address. * @return Next meta partial page ID or {@code 0} if it does not exist. */ - public long getCountersPageId(long pageAddr) { + public long getCacheSizesPageId(long pageAddr) { return PageUtils.getLong(pageAddr, NEXT_PART_META_PAGE_OFF); } /** - * Sets new reference to partition counters page (logical cache sizes). + * Sets new reference to page with logical cache sizes in cache group. * * @param pageAddr Partition metadata page address. * @param cntrsPageId New cache sizes page ID. */ - public void setCountersPageId(long pageAddr, long cntrsPageId) { + public void setSizesPageId(long pageAddr, long cntrsPageId) { PageUtils.putLong(pageAddr, NEXT_PART_META_PAGE_OFF, cntrsPageId); } @@ -228,6 +228,23 @@ public boolean setGapsLink(long pageAddr, long link) { "this PagePartitionMetaIO version: ver=" + getVersion()); } + /** + * @param pageAddr Page address. + */ + public long getTombstonesCount(long pageAddr) { + throw new UnsupportedOperationException("Tombstones count is not supported by " + + "this PagePartitionMetaIO version: ver=" + getVersion()); + } + + /** + * @param pageAddr Page address. + * @param tombstonesCount Tombstones count. + */ + public boolean setTombstonesCount(long pageAddr, long tombstonesCount) { + throw new UnsupportedOperationException("Tombstones count is not supported by " + + "this PagePartitionMetaIO version: ver=" + getVersion()); + } + /** {@inheritDoc} */ @Override protected void printPage(long pageAddr, int pageSize, GridStringBuilder sb) throws IgniteCheckedException { super.printPage(pageAddr, pageSize, sb); @@ -238,7 +255,7 @@ public boolean setGapsLink(long pageAddr, long link) { .a(",\n\tupdateCounter=").a(getUpdateCounter(pageAddr)) .a(",\n\tglobalRemoveId=").a(getGlobalRemoveId(pageAddr)) .a(",\n\tpartitionState=").a(state).a("(").a(GridDhtPartitionState.fromOrdinal(state)).a(")") - .a(",\n\tcountersPageId=").a(getCountersPageId(pageAddr)) + .a(",\n\tcacheSizesPageId=").a(getCacheSizesPageId(pageAddr)) .a("\n]"); } } diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/io/PagePartitionMetaIOV2.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/io/PagePartitionMetaIOV2.java index 37b7243b35f4e..e915e0bfb62ac 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/io/PagePartitionMetaIOV2.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/io/PagePartitionMetaIOV2.java @@ -18,7 +18,6 @@ package org.apache.ignite.internal.processors.cache.persistence.tree.io; -import org.apache.ignite.IgniteCheckedException; import org.apache.ignite.internal.pagemem.PageUtils; import org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState; import org.apache.ignite.internal.util.GridStringBuilder; @@ -37,6 +36,9 @@ public class PagePartitionMetaIOV2 extends PagePartitionMetaIO { /** */ private static final int GAPS_LINK = PART_META_REUSE_LIST_ROOT_OFF + 8; + /** */ + private static final int TOMBSTONES_COUNT = GAPS_LINK + 8; + /** * @param ver Version. */ @@ -100,7 +102,22 @@ public boolean setGapsLink(long pageAddr, long link) { } /** {@inheritDoc} */ - @Override protected void printPage(long pageAddr, int pageSize, GridStringBuilder sb) throws IgniteCheckedException { + @Override public long getTombstonesCount(long pageAddr) { + return PageUtils.getLong(pageAddr, TOMBSTONES_COUNT); + } + + /** {@inheritDoc} */ + @Override public boolean setTombstonesCount(long pageAddr, long tombstonesCnt) { + if (getTombstonesCount(pageAddr) == tombstonesCnt) + return false; + + PageUtils.putLong(pageAddr, TOMBSTONES_COUNT, tombstonesCnt); + + return true; + } + + /** {@inheritDoc} */ + @Override protected void printPage(long pageAddr, int pageSize, GridStringBuilder sb) { byte state = getPartitionState(pageAddr); sb.a("PagePartitionMeta[\n\ttreeRoot=").a(getReuseListRoot(pageAddr)); @@ -115,8 +132,9 @@ public boolean setGapsLink(long pageAddr, long link) { sb.a(",\n\tupdateCounter=").a(getUpdateCounter(pageAddr)); sb.a(",\n\tglobalRemoveId=").a(getGlobalRemoveId(pageAddr)); sb.a(",\n\tpartitionState=").a(state).a("(").a(GridDhtPartitionState.fromOrdinal(state)).a(")"); - sb.a(",\n\tcountersPageId=").a(getCountersPageId(pageAddr)); + sb.a(",\n\tcacheSizesPageId=").a(getCacheSizesPageId(pageAddr)); sb.a(",\n\tcntrUpdDataPageId=").a(getGapsLink(pageAddr)); + sb.a(",\n\ttombstonesCount=").a(getTombstonesCount(pageAddr)); sb.a("\n]"); } @@ -133,5 +151,6 @@ public void upgradePage(long pageAddr) { setPendingTreeRoot(pageAddr, 0); setPartitionMetaStoreReuseListRoot(pageAddr, 0); setGapsLink(pageAddr, 0); + setTombstonesCount(pageAddr, 0); } } diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/wal/serializer/RecordDataV1Serializer.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/wal/serializer/RecordDataV1Serializer.java index ec6897236eb0d..08e980f9c4252 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/wal/serializer/RecordDataV1Serializer.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/wal/serializer/RecordDataV1Serializer.java @@ -71,6 +71,7 @@ import org.apache.ignite.internal.pagemem.wal.record.delta.MetaPageUpdateNextSnapshotId; import org.apache.ignite.internal.pagemem.wal.record.delta.MetaPageUpdatePartitionDataRecord; import org.apache.ignite.internal.pagemem.wal.record.delta.MetaPageUpdatePartitionDataRecordV2; +import org.apache.ignite.internal.pagemem.wal.record.delta.MetaPageUpdatePartitionDataRecordV3; import org.apache.ignite.internal.pagemem.wal.record.delta.NewRootInitRecord; import org.apache.ignite.internal.pagemem.wal.record.delta.PageListMetaResetCountRecord; import org.apache.ignite.internal.pagemem.wal.record.delta.PagesListAddPageRecord; @@ -379,6 +380,10 @@ assert record instanceof PageSnapshot; return /*cache ID*/4 + /*page ID*/8 + /*upd cntr*/8 + /*rmv id*/8 + /*part size*/4 + /*counters page id*/8 + /*state*/ 1 + /*allocatedIdxCandidate*/ 4 + /*link*/ 8; + case PARTITION_META_PAGE_UPDATE_COUNTERS_V3: + return /*cache ID*/4 + /*page ID*/8 + /*upd cntr*/8 + /*rmv id*/8 + /*part size*/4 + /*counters page id*/8 + /*state*/ 1 + + /*allocatedIdxCandidate*/ 4 + /*link*/ 8 + /*tombstones cnt*/ 8; + case MEMORY_RECOVERY: return 8; @@ -611,6 +616,11 @@ WALRecord readPlainRecord(RecordType type, ByteBufferBackedDataInput in, break; + case PARTITION_META_PAGE_UPDATE_COUNTERS_V3: + res = new MetaPageUpdatePartitionDataRecordV3(in); + + break; + case MEMORY_RECOVERY: long ts = in.readLong(); @@ -1202,6 +1212,7 @@ void writePlainRecord(WALRecord rec, ByteBuffer buf) throws IgniteCheckedExcepti case PARTITION_META_PAGE_UPDATE_COUNTERS: case PARTITION_META_PAGE_UPDATE_COUNTERS_V2: + case PARTITION_META_PAGE_UPDATE_COUNTERS_V3: ((MetaPageUpdatePartitionDataRecord)rec).toBytes(buf); break; diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/tree/DataRow.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/tree/DataRow.java index add2abe36c4f2..5eb50f8457f2f 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/tree/DataRow.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/tree/DataRow.java @@ -120,10 +120,8 @@ public DataRow() { this.link = link; } - /** - * @param cacheId Cache ID. - */ - public void cacheId(int cacheId) { + /** {@inheritDoc} */ + @Override public void cacheId(int cacheId) { this.cacheId = cacheId; } } diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/metric/impl/MetricUtils.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/metric/impl/MetricUtils.java index fad16b5e95168..d80e33b57ea3a 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/metric/impl/MetricUtils.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/metric/impl/MetricUtils.java @@ -18,6 +18,7 @@ package org.apache.ignite.internal.processors.metric.impl; import java.util.Map; +import org.apache.ignite.internal.processors.cache.CacheGroupMetricsImpl; import org.apache.ignite.internal.processors.metric.GridMetricManager; import org.apache.ignite.internal.processors.metric.MetricRegistry; import org.apache.ignite.internal.util.typedef.T2; @@ -85,6 +86,14 @@ public static String cacheMetricsRegistryName(String cacheName, boolean isNear) return metricName(CACHE_METRICS, cacheName); } + /** + * @param cacheGrpName Cache group name. + * @return Cache group metrics registry name. + */ + public static String cacheGroupMetricsRegistryName(String cacheGrpName) { + return metricName(CacheGroupMetricsImpl.CACHE_GROUP_METRICS_PREFIX, cacheGrpName); + } + /** * Atomically sets the value to the given updated value * if the current value {@code ==} the expected value. diff --git a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/CacheDeferredDeleteSanitySelfTest.java b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/CacheDeferredDeleteSanitySelfTest.java index 69a19f47816e0..8c2bd1db35fcf 100644 --- a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/CacheDeferredDeleteSanitySelfTest.java +++ b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/CacheDeferredDeleteSanitySelfTest.java @@ -51,10 +51,10 @@ public void testDeferredDelete() throws Exception { testDeferredDelete(LOCAL, TRANSACTIONAL, false, false); testDeferredDelete(PARTITIONED, ATOMIC, false, true); - testDeferredDelete(PARTITIONED, TRANSACTIONAL, false, true); + testDeferredDelete(PARTITIONED, TRANSACTIONAL, false, false); testDeferredDelete(REPLICATED, ATOMIC, false, true); - testDeferredDelete(REPLICATED, TRANSACTIONAL, false, true); + testDeferredDelete(REPLICATED, TRANSACTIONAL, false, false); // Near testDeferredDelete(LOCAL, ATOMIC, true, false); @@ -64,7 +64,7 @@ public void testDeferredDelete() throws Exception { testDeferredDelete(PARTITIONED, TRANSACTIONAL, true, false); testDeferredDelete(REPLICATED, ATOMIC, true, true); - testDeferredDelete(REPLICATED, TRANSACTIONAL, true, true); + testDeferredDelete(REPLICATED, TRANSACTIONAL, true, false); } /** diff --git a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/GridCacheAbstractFullApiSelfTest.java b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/GridCacheAbstractFullApiSelfTest.java index 3103f8d0b635f..e0ad965758da6 100644 --- a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/GridCacheAbstractFullApiSelfTest.java +++ b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/GridCacheAbstractFullApiSelfTest.java @@ -6718,9 +6718,12 @@ public CheckEntriesDeletedTask(int cnt) { GridCacheContext ctx = ((IgniteKernal)ignite).internalCache(DEFAULT_CACHE_NAME).context(); - GridCacheEntryEx entry = ctx.isNear() ? ctx.near().dht().peekEx(key) : ctx.cache().peekEx(key); + if (ctx.isNear()) + ctx = ctx.near().dht().context(); - if (ignite.affinity(DEFAULT_CACHE_NAME).mapKeyToPrimaryAndBackups(key).contains(((IgniteKernal)ignite).localNode())) { + GridCacheEntryEx entry = ctx.cache().peekEx(key); + + if (ctx.deferredDelete() && ignite.affinity(DEFAULT_CACHE_NAME).mapKeyToPrimaryAndBackups(key).contains(((IgniteKernal)ignite).localNode())) { assertNotNull(entry); assertTrue(entry.deleted()); } diff --git a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/IgniteCacheConfigVariationsFullApiTest.java b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/IgniteCacheConfigVariationsFullApiTest.java index b653d01da8b8b..51a8af54a8417 100644 --- a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/IgniteCacheConfigVariationsFullApiTest.java +++ b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/IgniteCacheConfigVariationsFullApiTest.java @@ -6623,7 +6623,7 @@ public CheckEntriesDeletedTask(int cnt, String cacheName) { GridCacheEntryEx entry = ctx.isNear() ? ctx.near().dht().peekEx(key) : ctx.cache().peekEx(key); - if (ignite.affinity(cacheName).mapKeyToPrimaryAndBackups(key).contains(((IgniteKernal)ignite).localNode())) { + if (ctx.deferredDelete() && ignite.affinity(cacheName).mapKeyToPrimaryAndBackups(key).contains(((IgniteKernal)ignite).localNode())) { assertNotNull(entry); assertTrue(entry.deleted()); } diff --git a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/CacheRemoveWithTombstonesLoadTest.java b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/CacheRemoveWithTombstonesLoadTest.java new file mode 100644 index 0000000000000..d0661534250b9 --- /dev/null +++ b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/CacheRemoveWithTombstonesLoadTest.java @@ -0,0 +1,414 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.internal.processors.cache.distributed; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.ignite.Ignite; +import org.apache.ignite.IgniteCache; +import org.apache.ignite.IgniteSystemProperties; +import org.apache.ignite.Ignition; +import org.apache.ignite.cache.CacheWriteSynchronizationMode; +import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; +import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.configuration.DataRegionConfiguration; +import org.apache.ignite.configuration.DataStorageConfiguration; +import org.apache.ignite.configuration.IgniteConfiguration; +import org.apache.ignite.configuration.WALMode; +import org.apache.ignite.internal.IgniteEx; +import org.apache.ignite.internal.IgniteInternalFuture; +import org.apache.ignite.internal.processors.metric.impl.MetricUtils; +import org.apache.ignite.internal.util.typedef.internal.U; +import org.apache.ignite.spi.metric.LongMetric; +import org.apache.ignite.testframework.GridTestUtils; +import org.apache.ignite.testframework.MvccFeatureChecker; +import org.apache.ignite.testframework.junits.WithSystemProperty; +import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest; +import org.junit.After; +import org.junit.Assume; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL; +import static org.apache.ignite.cache.CacheMode.PARTITIONED; +import static org.apache.ignite.cache.CacheRebalanceMode.SYNC; + +/** + * + */ +@RunWith(Parameterized.class) +public class CacheRemoveWithTombstonesLoadTest extends GridCommonAbstractTest { + /** Dummy data. */ + private static final byte[] DUMMY_DATA = {}; + + /** Test parameters. */ + @Parameterized.Parameters(name = "persistenceEnabled={0}, historicalRebalance={1}") + public static Collection parameters() { + List res = new ArrayList<>(); + + for (boolean persistenceEnabled : new boolean[] {false, true}) { + for (boolean histRebalance : new boolean[] {false, true}) { + if (!persistenceEnabled && histRebalance) + continue; + + res.add(new Object[]{persistenceEnabled, histRebalance}); + } + } + + return res; + } + + /** */ + @Parameterized.Parameter(0) + public boolean persistence; + + /** */ + @Parameterized.Parameter(1) + public boolean histRebalance; + + /** {@inheritDoc} */ + @Override protected IgniteConfiguration getConfiguration(String gridName) throws Exception { + IgniteConfiguration cfg = super.getConfiguration(gridName); + + cfg.setConsistentId(gridName); + + DataStorageConfiguration dsCfg = new DataStorageConfiguration(); + + if (persistence) { + dsCfg.setDefaultDataRegionConfiguration( + new DataRegionConfiguration() + .setInitialSize(256L * 1024 * 1024) + .setMaxSize(256L * 1024 * 1024) + .setPersistenceEnabled(true)) + .setWalMode(WALMode.LOG_ONLY); + } + + dsCfg.setPageSize(1024); + + cfg.setDataStorageConfiguration(dsCfg); + + // Throttle rebalance. + cfg.setRebalanceThrottle(100); + + return cfg; + } + + /** + * + */ + @BeforeClass + public static void beforeTests() { + Assume.assumeFalse(MvccFeatureChecker.forcedMvcc()); + } + + /** + * + */ + @Before + public void before() throws Exception { + cleanPersistenceDir(); + + stopAllGrids(); + + if (histRebalance) + System.setProperty(IgniteSystemProperties.IGNITE_PDS_WAL_REBALANCE_THRESHOLD, "0"); + } + + /** + * + */ + @After + public void after() throws Exception { + if (histRebalance) + System.clearProperty(IgniteSystemProperties.IGNITE_PDS_WAL_REBALANCE_THRESHOLD); + + stopAllGrids(); + + cleanPersistenceDir(); + } + + /** + * @throws Exception If failed. + */ + @Test + @WithSystemProperty(key = IgniteSystemProperties.IGNITE_BASELINE_AUTO_ADJUST_ENABLED, value = "false") + public void removeAndRebalance() throws Exception { + IgniteEx ignite0 = startGrid(0); + + IgniteCache cache0; + + final int ADD_NODES = persistence ? 2 : 3; + final int KEYS = persistence ? 5_000 : 10_000; + + if (persistence) { + // Preload initial data to all nodes to have start point for WAL rebalance. + for (int i = 0, idx = 1; i < ADD_NODES; i++, idx++) + startGrid(idx); + + ignite0.cluster().active(true); + + awaitPartitionMapExchange(); + + cache0 = ignite0.getOrCreateCache(cacheConfiguration()); + + for (int k = 0; k < KEYS; k++) + cache0.put(new TestKey(k, DUMMY_DATA), new TestValue(DUMMY_DATA)); + + forceCheckpoint(); + + for (int i = 0, idx = 1; i < ADD_NODES; i++, idx++) { + stopGrid(idx); + + awaitPartitionMapExchange(); + } + } + + final int pageSize = ignite0.configuration().getDataStorageConfiguration().getPageSize(); + + ThreadLocalRandom rnd = ThreadLocalRandom.current(); + + List keys = new ArrayList<>(); + + Map data = new HashMap<>(); + + for (int i = 0; i < KEYS; i++) { + TestKey key = new TestKey(i, new byte[rnd.nextInt(pageSize * 3)]); + + keys.add(key); + + data.put(key, new TestValue(new byte[rnd.nextInt(pageSize * 3)])); + } + + cache0 = ignite0.getOrCreateCache(cacheConfiguration()); + + cache0.putAll(data); + + AtomicInteger nodeIdx = new AtomicInteger(); + + for (int iter = 0; iter < ADD_NODES; iter++) { + IgniteInternalFuture nodeStartFut = GridTestUtils.runAsync(() -> { + int idx = nodeIdx.incrementAndGet(); + + info("Start node: " + idx); + + U.sleep(500); + + return startGrid(idx); + }); + + long endTime = U.currentTimeMillis() + 5_000; + + while (U.currentTimeMillis() < endTime) { + for (int i = 0; i < 100; i++) { + TestKey key = keys.get(rnd.nextInt(keys.size())); + + if (rnd.nextBoolean()) { + cache0.remove(key); + + data.remove(key); + } + else { + TestValue val = new TestValue(new byte[rnd.nextInt(pageSize * 3)]); + + cache0.put(key, val); + data.put(key, val); + } + + U.sleep(10); + } + } + + nodeStartFut.get(30_000); + + checkData(keys, data); + + waitTombstoneCleanup(); + + checkData(keys, data); + } + + awaitPartitionMapExchange(); + + for (int iter = 0; iter < ADD_NODES; iter++) { + IgniteInternalFuture nodeStopFut = GridTestUtils.runAsync(() -> { + int idx = nodeIdx.getAndDecrement(); + + info("Stop node: " + idx); + + stopGrid(idx); + + awaitPartitionMapExchange(); + + return null; + }); + + long endTime = U.currentTimeMillis() + 2_500; + + while (U.currentTimeMillis() < endTime) { + for (int i = 0; i < 100; i++) { + TestKey key = keys.get(rnd.nextInt(keys.size())); + + if (rnd.nextBoolean()) { + cache0.remove(key); + + data.remove(key); + } else { + TestValue val = new TestValue(new byte[rnd.nextInt(pageSize * 3)]); + + cache0.put(key, val); + data.put(key, val); + } + } + + U.sleep(10); + } + + nodeStopFut.get(30_000); + + checkData(keys, data); + + waitTombstoneCleanup(); + + checkData(keys, data); + } + } + + /** + * @param keys Keys to check. + * @param data Expected data. + */ + private void checkData(List keys, Map data) { + for (Ignite node : Ignition.allGrids()) { + info("Check node: " + node.name()); + + IgniteCache cache = node.cache(DEFAULT_CACHE_NAME); + + for (TestKey key : keys) { + TestValue expVal = data.get(key); + TestValue val = cache.get(key); + + if (expVal == null) + assertNull(val); + else { + assertNotNull(val); + assertTrue(Arrays.equals(expVal.dummyData, val.dummyData)); + } + } + } + } + + /** + * @throws Exception If failed. + */ + private void waitTombstoneCleanup() throws Exception { + for (Ignite node : Ignition.allGrids()) { + final LongMetric tombstones = ((IgniteEx)node).context().metric().registry( + MetricUtils.cacheGroupMetricsRegistryName(DEFAULT_CACHE_NAME)).findMetric("Tombstones"); + + GridTestUtils.waitForCondition(() -> tombstones.value() == 0, 30_000); + + assertEquals("Failed to wait for tombstone cleanup: " + node.name(), 0, tombstones.value()); + } + } + + /** + * @return Cache configuration. + */ + private CacheConfiguration cacheConfiguration() { + CacheConfiguration ccfg = new CacheConfiguration<>(DEFAULT_CACHE_NAME); + + ccfg.setAtomicityMode(TRANSACTIONAL); + ccfg.setCacheMode(PARTITIONED); + ccfg.setBackups(2); + ccfg.setRebalanceMode(SYNC); + ccfg.setReadFromBackup(true); + ccfg.setWriteSynchronizationMode(CacheWriteSynchronizationMode.FULL_SYNC); + ccfg.setAffinity(new RendezvousAffinityFunction(false, 64)); + + return ccfg; + } + + /** + * + */ + static class TestKey { + /** */ + private final int id; + + /** */ + private final byte[] dummyData; + + /** + * @param id ID. + * @param dummyData Dummy byte array (to test with various key sizes). + */ + public TestKey(int id, byte[] dummyData) { + this.id = id; + this.dummyData = dummyData; + } + + /** {@inheritDoc} */ + @Override public boolean equals(Object o) { + if (this == o) + return true; + if (o == null || getClass() != o.getClass()) + return false; + + TestKey testKey = (TestKey) o; + + return id == testKey.id && Arrays.equals(dummyData, testKey.dummyData); + } + + /** {@inheritDoc} */ + @Override public int hashCode() { + int result = Objects.hash(id); + result = 31 * result + Arrays.hashCode(dummyData); + return result; + } + + /** {@inheritDoc} */ + @Override public String toString() { + return "TestKey [id=" + id + "]"; + } + } + + /** + * + */ + static class TestValue { + /** */ + private final byte[] dummyData; + + /** + * @param dummyData Dummy byte array (to test with various value sizes). + */ + public TestValue(byte[] dummyData) { + this.dummyData = dummyData; + } + } +} diff --git a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/CacheRemoveWithTombstonesTest.java b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/CacheRemoveWithTombstonesTest.java new file mode 100644 index 0000000000000..c8b0a7f56f28d --- /dev/null +++ b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/CacheRemoveWithTombstonesTest.java @@ -0,0 +1,289 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.internal.processors.cache.distributed; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import org.apache.ignite.IgniteCache; +import org.apache.ignite.IgniteDataStreamer; +import org.apache.ignite.IgniteSystemProperties; +import org.apache.ignite.cache.CacheAtomicityMode; +import org.apache.ignite.cache.CacheWriteSynchronizationMode; +import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; +import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.configuration.DataRegionConfiguration; +import org.apache.ignite.configuration.DataStorageConfiguration; +import org.apache.ignite.configuration.IgniteConfiguration; +import org.apache.ignite.configuration.WALMode; +import org.apache.ignite.internal.IgniteEx; +import org.apache.ignite.internal.TestRecordingCommunicationSpi; +import org.apache.ignite.internal.processors.cache.GridCacheGroupIdMessage; +import org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionSupplyMessage; +import org.apache.ignite.internal.processors.metric.impl.MetricUtils; +import org.apache.ignite.spi.metric.LongMetric; +import org.apache.ignite.testframework.GridTestUtils; +import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import static org.apache.ignite.cache.CacheAtomicityMode.ATOMIC; +import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL; +import static org.apache.ignite.cache.CacheMode.PARTITIONED; +import static org.apache.ignite.cache.CacheRebalanceMode.ASYNC; + +/** + * + */ +@RunWith(Parameterized.class) +public class CacheRemoveWithTombstonesTest extends GridCommonAbstractTest { + /** Test parameters. */ + @Parameterized.Parameters(name = "persistenceEnabled={0}, historicalRebalance={1}") + public static Collection parameters() { + List res = new ArrayList<>(); + + for (boolean persistenceEnabled : new boolean[] {false, true}) { + for (boolean histRebalance : new boolean[] {false, true}) { + if (!persistenceEnabled && histRebalance) + continue; + + res.add(new Object[]{persistenceEnabled, histRebalance}); + } + } + + return res; + } + + /** */ + @Parameterized.Parameter(0) + public boolean persistence; + + /** */ + @Parameterized.Parameter(1) + public boolean histRebalance; + + /** {@inheritDoc} */ + @Override protected IgniteConfiguration getConfiguration(String gridName) throws Exception { + IgniteConfiguration cfg = super.getConfiguration(gridName); + + TestRecordingCommunicationSpi commSpi = new TestRecordingCommunicationSpi(); + + cfg.setConsistentId(gridName); + + cfg.setCommunicationSpi(commSpi); + + if (persistence) { + DataStorageConfiguration dsCfg = new DataStorageConfiguration() + .setDefaultDataRegionConfiguration( + new DataRegionConfiguration() + .setInitialSize(256L * 1024 * 1024) + .setMaxSize(256L * 1024 * 1024) + .setPersistenceEnabled(true) + ) + .setWalMode(WALMode.LOG_ONLY); + + cfg.setDataStorageConfiguration(dsCfg); + } + + return cfg; + } + + /** + * + */ + @Before + public void before() throws Exception { + stopAllGrids(); + + cleanPersistenceDir(); + + if (histRebalance) + System.setProperty(IgniteSystemProperties.IGNITE_PDS_WAL_REBALANCE_THRESHOLD, "0"); + } + + /** + * + */ + @After + public void after() throws Exception { + if (histRebalance) + System.clearProperty(IgniteSystemProperties.IGNITE_PDS_WAL_REBALANCE_THRESHOLD); + + stopAllGrids(); + + cleanPersistenceDir(); + } + + /** + * @throws Exception If failed. + */ + @Test + public void testRemoveAndRebalanceRaceTx() throws Exception { + testRemoveAndRebalanceRace(TRANSACTIONAL, true); + } + + /** + * @throws Exception If failed. + */ + @Test + public void testRemoveAndRebalanceRaceAtomic() throws Exception { + testRemoveAndRebalanceRace(ATOMIC, false); + } + + /** + * @throws Exception If failed. + * @param expTombstone {@code True} if tombstones should be created. + */ + private void testRemoveAndRebalanceRace(CacheAtomicityMode atomicityMode, boolean expTombstone) throws Exception { + IgniteEx ignite0 = startGrid(0); + + if (histRebalance) + startGrid(1); + + if (persistence) + ignite0.cluster().active(true); + + IgniteCache cache0 = ignite0.createCache(cacheConfiguration(atomicityMode)); + + final int KEYS = histRebalance ? 1024 : 1024 * 256; + + if (histRebalance) { + // Preload initial data to have start point for WAL rebalance. + try (IgniteDataStreamer streamer = ignite0.dataStreamer(DEFAULT_CACHE_NAME)) { + streamer.allowOverwrite(true); + + for (int i = 0; i < KEYS; i++) + streamer.addData(-i, 0); + } + + forceCheckpoint(); + + stopGrid(1); + } + + // This data will be rebalanced. + try (IgniteDataStreamer streamer = ignite0.dataStreamer(DEFAULT_CACHE_NAME)) { + streamer.allowOverwrite(true); + + for (int i = 0; i < KEYS; i++) + streamer.addData(i, i); + } + + blockRebalance(ignite0); + + IgniteEx ignite1 = GridTestUtils.runAsync(() -> startGrid(1)).get(10, TimeUnit.SECONDS); + + if (persistence) { + ignite0.cluster().baselineAutoAdjustEnabled(false); + + ignite0.cluster().setBaselineTopology(2); + } + + TestRecordingCommunicationSpi.spi(ignite0).waitForBlocked(); + + Set keysWithTombstone = new HashSet<>(); + + // Do removes while rebalance is in progress. + // All keys are removed during historical rebalance. + for (int i = 0, step = histRebalance ? 1 : 64; i < KEYS; i += step) { + keysWithTombstone.add(i); + + cache0.remove(i); + } + + final LongMetric tombstoneMetric0 = ignite0.context().metric().registry( + MetricUtils.cacheGroupMetricsRegistryName(DEFAULT_CACHE_NAME)).findMetric("Tombstones"); + + final LongMetric tombstoneMetric1 = ignite1.context().metric().registry( + MetricUtils.cacheGroupMetricsRegistryName(DEFAULT_CACHE_NAME)).findMetric("Tombstones"); + + // On first node there should not be tombstones. + assertEquals(0, tombstoneMetric0.value()); + + if (expTombstone) + assertEquals(keysWithTombstone.size(), tombstoneMetric1.value()); + else + assertEquals(0, tombstoneMetric1.value()); + + // Update some of removed keys, this should remove tombstones. + for (int i = 0; i < KEYS; i += 128) { + keysWithTombstone.remove(i); + + cache0.put(i, i); + } + + assertTrue("Keys with tombstones should exist", !keysWithTombstone.isEmpty()); + + assertEquals(0, tombstoneMetric0.value()); + + if (expTombstone) + assertEquals(keysWithTombstone.size(), tombstoneMetric1.value()); + else + assertEquals(0, tombstoneMetric1.value()); + + TestRecordingCommunicationSpi.spi(ignite0).stopBlock(); + + awaitPartitionMapExchange(); + + IgniteCache cache1 = ignite(1).cache(DEFAULT_CACHE_NAME); + + for (int i = 0; i < KEYS; i++) { + if (keysWithTombstone.contains(i)) + assertNull(cache1.get(i)); + else + assertEquals((Object)i, cache1.get(i)); + } + + // Tombstones should be removed after once rebalance is completed. + GridTestUtils.waitForCondition(() -> tombstoneMetric1.value() == 0, 30_000); + + assertEquals(0, tombstoneMetric1.value()); + } + + /** + * + */ + private static void blockRebalance(IgniteEx node) { + final int grpId = groupIdForCache(node, DEFAULT_CACHE_NAME); + + TestRecordingCommunicationSpi.spi(node).blockMessages((node0, msg) -> + (msg instanceof GridDhtPartitionSupplyMessage) + && ((GridCacheGroupIdMessage)msg).groupId() == grpId + ); + } + + /** + * @param atomicityMode Cache atomicity mode. + * @return Cache configuration. + */ + private CacheConfiguration cacheConfiguration(CacheAtomicityMode atomicityMode) { + return new CacheConfiguration<>(DEFAULT_CACHE_NAME) + .setAtomicityMode(atomicityMode) + .setCacheMode(PARTITIONED) + .setBackups(2) + .setRebalanceMode(ASYNC) + .setWriteSynchronizationMode(CacheWriteSynchronizationMode.FULL_SYNC) + .setAffinity(new RendezvousAffinityFunction(false, 64)); + } +} diff --git a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/CacheRemoveWithTombstonesFailoverTest.java b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/CacheRemoveWithTombstonesFailoverTest.java new file mode 100644 index 0000000000000..db60ffc8a062d --- /dev/null +++ b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/CacheRemoveWithTombstonesFailoverTest.java @@ -0,0 +1,187 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.internal.processors.cache.distributed.dht.topology; + +import java.util.HashSet; +import java.util.Set; +import org.apache.ignite.IgniteSystemProperties; +import org.apache.ignite.cache.CacheWriteSynchronizationMode; +import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; +import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.configuration.DataRegionConfiguration; +import org.apache.ignite.configuration.DataStorageConfiguration; +import org.apache.ignite.configuration.IgniteConfiguration; +import org.apache.ignite.configuration.WALMode; +import org.apache.ignite.internal.IgniteEx; +import org.apache.ignite.internal.TestRecordingCommunicationSpi; +import org.apache.ignite.internal.processors.cache.GridCacheGroupIdMessage; +import org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionSupplyMessage; +import org.apache.ignite.internal.processors.metric.impl.MetricUtils; +import org.apache.ignite.spi.metric.LongMetric; +import org.apache.ignite.testframework.GridTestUtils; +import org.apache.ignite.testframework.junits.WithSystemProperty; +import org.junit.Assert; +import org.junit.Test; + +import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL; +import static org.apache.ignite.cache.CacheMode.PARTITIONED; +import static org.apache.ignite.cache.CacheRebalanceMode.ASYNC; + +/** + * Tests to check failover scenarios over tombstones. + */ +public class CacheRemoveWithTombstonesFailoverTest extends PartitionsEvictManagerAbstractTest { + /** {@inheritDoc} */ + @Override protected IgniteConfiguration getConfiguration(String gridName) throws Exception { + IgniteConfiguration cfg = super.getConfiguration(gridName); + + TestRecordingCommunicationSpi commSpi = new TestRecordingCommunicationSpi(); + + cfg.setConsistentId(gridName); + + cfg.setCommunicationSpi(commSpi); + + DataStorageConfiguration dsCfg = new DataStorageConfiguration() + .setDefaultDataRegionConfiguration( + new DataRegionConfiguration() + .setInitialSize(256L * 1024 * 1024) + .setMaxSize(256L * 1024 * 1024) + .setPersistenceEnabled(true) + ) + .setCheckpointFrequency(1024 * 1024 * 1024) + .setWalMode(WALMode.LOG_ONLY); + + cfg.setDataStorageConfiguration(dsCfg); + + cfg.setCacheConfiguration(cacheConfiguration()); + + return cfg; + } + + /** + * Test check that tombstones reside in persistent partition will be cleared after node restart. + */ + @Test + @WithSystemProperty(key = IgniteSystemProperties.IGNITE_BASELINE_AUTO_ADJUST_ENABLED, value = "false") + public void testTombstonesClearedAfterRestart() throws Exception { + IgniteEx crd = startGrid(0); + + crd.cluster().active(true); + + final int KEYS = 1024; + + for (int k = 0; k < KEYS; k++) + crd.cache(DEFAULT_CACHE_NAME).put(k, k); + + blockRebalance(crd); + + IgniteEx node = startGrid(1); + + // Do not run clear tombsones task. + instrumentEvictionQueue(node, task -> { + if (task instanceof PartitionsEvictManager.ClearTombstonesTask) + return null; + + return task; + }); + + resetBaselineTopology(); + + TestRecordingCommunicationSpi.spi(crd).waitForBlocked(); + + Set keysWithTombstone = new HashSet<>(); + + // Do removes while rebalance is in progress. + for (int i = 0; i < KEYS; i += 2) { + keysWithTombstone.add(i); + + crd.cache(DEFAULT_CACHE_NAME).remove(i); + } + + final LongMetric tombstoneMetric = node.context().metric().registry( + MetricUtils.cacheGroupMetricsRegistryName(DEFAULT_CACHE_NAME)).findMetric("Tombstones"); + + Assert.assertEquals(keysWithTombstone.size(), tombstoneMetric.value()); + + // Resume rebalance. + TestRecordingCommunicationSpi.spi(crd).stopBlock(); + + // Partitions should be in OWNING state. + awaitPartitionMapExchange(); + + // But tombstones removal should be skipped. + Assert.assertEquals(keysWithTombstone.size(), tombstoneMetric.value()); + + // Stop node with tombstones. + stopGrid(1); + + // Stop coordinator. + stopGrid(0); + + // Startup node with tombstones in inactive state. + node = startGrid(1); + + final int grpId = groupIdForCache(node, DEFAULT_CACHE_NAME); + + // Tombstone metrics are unavailable before join to topology, using internal api. + long tombstonesBeforeActivation = node.context().cache().cacheGroup(grpId).topology().localPartitions() + .stream().map(part -> part.dataStore().tombstonesCount()).reduce(Long::sum).orElse(0L); + + Assert.assertEquals(keysWithTombstone.size(), tombstonesBeforeActivation); + + crd = startGrid(0); + + crd.cluster().active(true); + + awaitPartitionMapExchange(); + + final LongMetric tombstoneMetric1 = node.context().metric().registry( + MetricUtils.cacheGroupMetricsRegistryName(DEFAULT_CACHE_NAME)).findMetric("Tombstones"); + + // Tombstones should be removed after join to topology. + GridTestUtils.waitForCondition(() -> tombstoneMetric1.value() == 0, 30_000); + + assertEquals(0, tombstoneMetric1.value()); + } + + /** + * + */ + private static void blockRebalance(IgniteEx node) { + final int grpId = groupIdForCache(node, DEFAULT_CACHE_NAME); + + TestRecordingCommunicationSpi.spi(node).blockMessages((node0, msg) -> + (msg instanceof GridDhtPartitionSupplyMessage) + && ((GridCacheGroupIdMessage)msg).groupId() == grpId + ); + } + + /** + * @return Cache configuration. + */ + private CacheConfiguration cacheConfiguration() { + return new CacheConfiguration<>(DEFAULT_CACHE_NAME) + .setAtomicityMode(TRANSACTIONAL) + .setCacheMode(PARTITIONED) + .setBackups(1) + .setRebalanceMode(ASYNC) + .setReadFromBackup(true) + .setWriteSynchronizationMode(CacheWriteSynchronizationMode.FULL_SYNC) + .setAffinity(new RendezvousAffinityFunction(false, 64)); + } +} diff --git a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/DropCacheContextDuringEvictionTest.java b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/DropCacheContextDuringEvictionTest.java index 4a7de041fd0ea..72a28532b4927 100644 --- a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/DropCacheContextDuringEvictionTest.java +++ b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/DropCacheContextDuringEvictionTest.java @@ -39,11 +39,15 @@ public class DropCacheContextDuringEvictionTest extends PartitionsEvictManagerAb public void testDeactivation() throws Exception { T2 nodeAndEvictLatch = makeNodeWithEvictLatch(); - IgniteCache cache = nodeAndEvictLatch.get1().createCache(new CacheConfiguration<>(DEFAULT_CACHE_NAME) + nodeAndEvictLatch.get1().createCache(new CacheConfiguration<>(DEFAULT_CACHE_NAME) .setGroupName("test-grp")); - for (int i = 0; i < 100_000; i++) - cache.put(i, i); + try (IgniteDataStreamer streamer = nodeAndEvictLatch.get1().dataStreamer(DEFAULT_CACHE_NAME)) { + streamer.allowOverwrite(true); + + for (int k = 0; k < 100_000; k++) + streamer.addData(k, k); + } doActionDuringEviction(nodeAndEvictLatch, () -> nodeAndEvictLatch.get1().cluster().active(false)); @@ -60,17 +64,19 @@ public void testDestroyCacheGroup() throws Exception { List caches = new ArrayList<>(); for (int idx = 0; idx < 10; idx++) { - IgniteCache cache = nodeAndEvictLatch.get1().createCache(new CacheConfiguration<>(DEFAULT_CACHE_NAME + idx) - .setGroupName("test-grp")); + String cacheName = DEFAULT_CACHE_NAME + idx; - caches.add(cache.getName()); + nodeAndEvictLatch.get1().createCache(new CacheConfiguration<>(cacheName) + .setGroupName("test-grp")); - try (IgniteDataStreamer streamer = nodeAndEvictLatch.get1().dataStreamer(cache.getName())) { + try (IgniteDataStreamer streamer = nodeAndEvictLatch.get1().dataStreamer(cacheName)) { streamer.allowOverwrite(true); - for (int i = 0; i < 200_000; i++) - streamer.addData(i, i); + for (int k = 0; k < 100_000; k++) + streamer.addData(k, k); } + + caches.add(cacheName); } doActionDuringEviction(nodeAndEvictLatch, () -> nodeAndEvictLatch.get1().destroyCaches(caches)); diff --git a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/PartitionsEvictManagerAbstractTest.java b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/PartitionsEvictManagerAbstractTest.java index e49e07fe6dc15..233b25ec5db80 100644 --- a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/PartitionsEvictManagerAbstractTest.java +++ b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/PartitionsEvictManagerAbstractTest.java @@ -17,26 +17,22 @@ package org.apache.ignite.internal.processors.cache.distributed.dht.topology; -import java.lang.reflect.Field; -import java.lang.reflect.Modifier; import java.util.Queue; import java.util.concurrent.CountDownLatch; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.atomic.AtomicBoolean; -import javax.annotation.Nullable; import org.apache.ignite.Ignite; -import org.apache.ignite.configuration.DataRegionConfiguration; -import org.apache.ignite.configuration.DataStorageConfiguration; import org.apache.ignite.configuration.IgniteConfiguration; import org.apache.ignite.failure.AbstractFailureHandler; import org.apache.ignite.failure.FailureContext; import org.apache.ignite.internal.IgniteEx; import org.apache.ignite.internal.IgniteInterruptedCheckedException; -import org.apache.ignite.internal.util.future.GridFutureAdapter; import org.apache.ignite.internal.util.typedef.T2; import org.apache.ignite.internal.util.typedef.internal.U; +import org.apache.ignite.lang.IgniteClosure; import org.apache.ignite.testframework.GridTestUtils; import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest; +import org.jetbrains.annotations.NotNull; /** * @@ -49,8 +45,7 @@ public abstract class PartitionsEvictManagerAbstractTest extends GridCommonAbstr @Override protected IgniteConfiguration getConfiguration(String igniteInstanceName) throws Exception { IgniteConfiguration cfg = super.getConfiguration(igniteInstanceName); - cfg.setDataStorageConfiguration(new DataStorageConfiguration() - .setDefaultDataRegionConfiguration(new DataRegionConfiguration().setPersistenceEnabled(true))); + cfg.setActiveOnStart(false); cfg.setFailureHandler(new AbstractFailureHandler() { /** {@inheritDoc} */ @@ -67,12 +62,16 @@ public abstract class PartitionsEvictManagerAbstractTest extends GridCommonAbstr /** {@inheritDoc} */ @Override protected void beforeTest() throws Exception { + stopAllGrids(); + cleanPersistenceDir(); } /** {@inheritDoc} */ @Override protected void afterTest() throws Exception { stopAllGrids(); + + cleanPersistenceDir(); } /** @@ -92,40 +91,43 @@ protected void awaitEvictionQueueIsEmpty(IgniteEx node, int ms) throws IgniteInt protected void awaitEvictionQueueForFilling(IgniteEx node, int ms) throws IgniteInterruptedCheckedException { PartitionsEvictManager.BucketQueue evictionQueue = node.context().cache().context().evict().evictionQueue; - assertTrue(GridTestUtils.waitForCondition(() -> !evictionQueue.isEmpty(), ms)); + assertTrue(GridTestUtils.waitForCondition(() -> { + for (Queue queue : evictionQueue.buckets) + return ((InstrumentedEvictionQueue) queue).itemOffered; + + return false; + }, ms)); } /** * @param node Node. - * @param latch Latch. - * @param completeWithError Inner future throws exception. + * @param interceptor Interceptor that will be invoked after task from eviction has polled. */ - protected void subscribeEvictionQueueAtLatch(IgniteEx node, CountDownLatch latch, boolean completeWithError) { + protected void instrumentEvictionQueue( + IgniteEx node, + IgniteClosure interceptor + ) { PartitionsEvictManager.BucketQueue evictionQueue = node.context().cache().context().evict().evictionQueue; Queue[] buckets = evictionQueue.buckets; for (int i = 0; i < buckets.length; i++) - buckets[i] = new WaitingQueue(latch, completeWithError); + buckets[i] = new InstrumentedEvictionQueue(interceptor); } /** * */ protected T2 makeNodeWithEvictLatch() throws Exception { - return makeNodeWithEvictLatch(false); - } - - /** - * - */ - protected T2 makeNodeWithEvictLatch(boolean completeWithError) throws Exception { IgniteEx node1 = startGrid(0); - node1.cluster().baselineAutoAdjustEnabled(false); - CountDownLatch latch = new CountDownLatch(1); - subscribeEvictionQueueAtLatch(node1, latch, completeWithError); + instrumentEvictionQueue(node1, task -> { + U.awaitQuiet(latch); + + return task; + }); node1.cluster().active(true); @@ -137,11 +139,7 @@ protected T2 makeNodeWithEvictLatch(boolean completeWi * @param r R. */ protected void doActionDuringEviction(T2 nodeAndEvictLatch, Runnable r) throws Exception { - IgniteEx node2 = startGrid(1); - - awaitPartitionMapExchange(); - - nodeAndEvictLatch.get1().cluster().setBaselineTopology(node2.cluster().topologyVersion()); + startGrid(1); awaitEvictionQueueForFilling(nodeAndEvictLatch.get1(), 100_000); @@ -153,55 +151,38 @@ protected void doActionDuringEviction(T2 nodeAndEvictL } /** - * Queue witch waits on the poll or breaks a PartitionEvictionTask. + * Queue that executes an interceptor during eviction task poll. */ - private class WaitingQueue extends LinkedBlockingQueue { - /** Latch. */ - private final CountDownLatch latch; + private static class InstrumentedEvictionQueue extends LinkedBlockingQueue { + /** Interceptor. */ + private final IgniteClosure interceptor; - /** Complete with error. */ - private final boolean completeWithError; + /** Empty indicator. */ + private volatile boolean itemOffered; /** - * @param latch Latch. - * @param completeWithError flag. + * @param interceptor Interceptor. */ - public WaitingQueue(CountDownLatch latch, boolean completeWithError) { - this.latch = latch; - this.completeWithError = completeWithError; + private InstrumentedEvictionQueue(IgniteClosure interceptor + ) { + this.interceptor = interceptor; } /** {@inheritDoc} */ - @Override public Object poll() { - U.awaitQuiet(latch); + @Override public boolean offer(@NotNull Object o) { + itemOffered = true; + return super.offer(o); + } + + /** {@inheritDoc} */ + @Override public Object poll() { Object obj = super.poll(); - // This code uses for failure handler testing into PartitionEvictionTask. - if(obj != null && completeWithError) { - try { - Field field = U.findField(PartitionsEvictManager.PartitionEvictionTask.class, "finishFut"); - - field.setAccessible(true); - - Field modifiersField = Field.class.getDeclaredField("modifiers"); - modifiersField.setAccessible(true); - modifiersField.setInt(field, field.getModifiers() & ~Modifier.FINAL); - - field.set(obj, new GridFutureAdapter() { - @Override - protected boolean onDone(@Nullable Object res, @Nullable Throwable err, boolean cancel) { - if (err == null) - throw new RuntimeException("TEST"); - - return super.onDone(res, err, cancel); - } - }); - } - catch (Exception e) { - fail(); - } - } + if (obj instanceof PartitionsEvictManager.AbstractEvictionTask) + return interceptor.apply((PartitionsEvictManager.AbstractEvictionTask) obj); return obj; } diff --git a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/PartitionsEvictionTaskFailureHandlerTest.java b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/PartitionsEvictionTaskFailureHandlerTest.java index 58c24608c29c5..e78c61b3d5075 100644 --- a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/PartitionsEvictionTaskFailureHandlerTest.java +++ b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/PartitionsEvictionTaskFailureHandlerTest.java @@ -17,11 +17,16 @@ package org.apache.ignite.internal.processors.cache.distributed.dht.topology; -import java.util.concurrent.CountDownLatch; -import org.apache.ignite.IgniteCache; +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; +import java.util.concurrent.atomic.AtomicBoolean; +import javax.annotation.Nullable; +import org.apache.ignite.IgniteDataStreamer; import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.configuration.IgniteConfiguration; import org.apache.ignite.internal.IgniteEx; -import org.apache.ignite.internal.util.typedef.T2; +import org.apache.ignite.internal.util.future.GridFutureAdapter; +import org.apache.ignite.internal.util.typedef.internal.U; import org.apache.ignite.testframework.GridTestUtils; import org.junit.Test; @@ -29,20 +34,69 @@ * */ public class PartitionsEvictionTaskFailureHandlerTest extends PartitionsEvictManagerAbstractTest { + /** {@inheritDoc} */ + @Override protected IgniteConfiguration getConfiguration(String igniteInstanceName) throws Exception { + IgniteConfiguration cfg = super.getConfiguration(igniteInstanceName); + + cfg.setCacheConfiguration(new CacheConfiguration<>(DEFAULT_CACHE_NAME)); + + return cfg; + } + /** * */ @Test public void testEvictionTaskShouldCallFailureHandler() throws Exception { - T2 nodeAndEvictLatch = makeNodeWithEvictLatch(true); + IgniteEx node = startGrid(0); + + AtomicBoolean once = new AtomicBoolean(); + + // Partition eviction task should throw exception after completion. + instrumentEvictionQueue(node, task -> { + if (!(task instanceof PartitionsEvictManager.PartitionEvictionTask)) + return task; + + // Fail once. + if (!once.compareAndSet(false, true)) + return task; + + try { + Field field = U.findField(PartitionsEvictManager.PartitionEvictionTask.class, "finishFut"); + + field.setAccessible(true); + + Field modifiersField = Field.class.getDeclaredField("modifiers"); + modifiersField.setAccessible(true); + modifiersField.setInt(field, field.getModifiers() & ~Modifier.FINAL); + + field.set(task, new GridFutureAdapter() { + @Override protected boolean onDone(@Nullable Object res, @Nullable Throwable err, boolean cancel) { + if (err == null) + throw new RuntimeException("TEST"); + + return super.onDone(res, err, cancel); + } + }); + } + catch (Exception e) { + fail(); + } + + return task; + }); + + node.cluster().active(true); - IgniteCache cache = nodeAndEvictLatch.get1().createCache(new CacheConfiguration<>(DEFAULT_CACHE_NAME) - .setGroupName("test-grp")); + try (IgniteDataStreamer streamer = node.dataStreamer(DEFAULT_CACHE_NAME)) { + streamer.allowOverwrite(true); - for (int i = 0; i < 100_000; i++) - cache.put(i, i); + for (int k = 0; k < 1024; k++) + node.cache(DEFAULT_CACHE_NAME).put(k, k); + } - doActionDuringEviction(nodeAndEvictLatch, () -> {}); + // Some partitions from node 0 should be evicted. + startGrid(1); assertTrue(GridTestUtils.waitForCondition(() -> failure.get(), 10_000)); } diff --git a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/persistence/db/IgnitePdsWithTtlTest.java b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/persistence/db/IgnitePdsWithTtlTest.java index afe2b34050858..4461d947a46f9 100644 --- a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/persistence/db/IgnitePdsWithTtlTest.java +++ b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/persistence/db/IgnitePdsWithTtlTest.java @@ -316,7 +316,7 @@ protected void waitAndCheckExpired( IgniteCacheOffheapManager.CacheDataStore dataStore = ctx.cache().cacheGroup(CU.cacheId(GROUP_NAME)).offheap().dataStore(locPart); - GridCursor cur = dataStore.cursor(); + GridCursor cur = dataStore.cursor(false); assertFalse(cur.next()); assertEquals(0, locPart.fullSize()); diff --git a/modules/core/src/test/java/org/apache/ignite/internal/processors/database/CacheFreeListSelfTest.java b/modules/core/src/test/java/org/apache/ignite/internal/processors/database/CacheFreeListSelfTest.java index e4a8c9b1b6fd7..3ee9e95ce16ba 100644 --- a/modules/core/src/test/java/org/apache/ignite/internal/processors/database/CacheFreeListSelfTest.java +++ b/modules/core/src/test/java/org/apache/ignite/internal/processors/database/CacheFreeListSelfTest.java @@ -623,6 +623,11 @@ private TestDataRow(int keySize, int valSize) { return 0; } + /** {@inheritDoc} */ + @Override public void cacheId(int cacheId) { + throw new UnsupportedOperationException(); + } + /** {@inheritDoc} */ @Override public long newMvccCoordinatorVersion() { return 0; diff --git a/modules/core/src/test/java/org/apache/ignite/testsuites/IgniteCacheMvccTestSuite9.java b/modules/core/src/test/java/org/apache/ignite/testsuites/IgniteCacheMvccTestSuite9.java index 243fb4b344838..ff1e1ec6281eb 100644 --- a/modules/core/src/test/java/org/apache/ignite/testsuites/IgniteCacheMvccTestSuite9.java +++ b/modules/core/src/test/java/org/apache/ignite/testsuites/IgniteCacheMvccTestSuite9.java @@ -25,9 +25,12 @@ import org.apache.ignite.internal.processors.cache.IgniteCacheGetCustomCollectionsSelfTest; import org.apache.ignite.internal.processors.cache.IgniteCacheLoadRebalanceEvictionSelfTest; import org.apache.ignite.internal.processors.cache.distributed.CacheAtomicPrimarySyncBackPressureTest; +import org.apache.ignite.internal.processors.cache.distributed.CacheRemoveWithTombstonesLoadTest; +import org.apache.ignite.internal.processors.cache.distributed.CacheRemoveWithTombstonesTest; import org.apache.ignite.internal.processors.cache.distributed.IgniteCachePrimarySyncTest; import org.apache.ignite.internal.processors.cache.distributed.IgniteTxCachePrimarySyncTest; import org.apache.ignite.internal.processors.cache.distributed.IgniteTxConcurrentRemoveObjectsTest; +import org.apache.ignite.internal.processors.cache.distributed.dht.topology.CacheRemoveWithTombstonesFailoverTest; import org.apache.ignite.internal.processors.cache.transactions.TxPartitionCounterStateConsistencyHistoryRebalanceTest; import org.apache.ignite.internal.metric.IoStatisticsCachePersistenceSelfTest; import org.apache.ignite.internal.metric.IoStatisticsCacheSelfTest; @@ -88,6 +91,11 @@ public static List> suite() { ignoredTests.add(IoStatisticsCacheSelfTest.class); ignoredTests.add(IoStatisticsCachePersistenceSelfTest.class); + // Tombstones are not created with mvcc. + ignoredTests.add(CacheRemoveWithTombstonesTest.class); + ignoredTests.add(CacheRemoveWithTombstonesLoadTest.class); + ignoredTests.add(CacheRemoveWithTombstonesFailoverTest.class); + return new ArrayList<>(IgniteCacheTestSuite9.suite(ignoredTests)); } } diff --git a/modules/core/src/test/java/org/apache/ignite/testsuites/IgniteCacheTestSuite9.java b/modules/core/src/test/java/org/apache/ignite/testsuites/IgniteCacheTestSuite9.java index 3b23b916c8cbe..6d18a7239428d 100644 --- a/modules/core/src/test/java/org/apache/ignite/testsuites/IgniteCacheTestSuite9.java +++ b/modules/core/src/test/java/org/apache/ignite/testsuites/IgniteCacheTestSuite9.java @@ -36,11 +36,14 @@ import org.apache.ignite.internal.processors.cache.IgniteCacheLoadRebalanceEvictionSelfTest; import org.apache.ignite.internal.processors.cache.distributed.CacheAtomicPrimarySyncBackPressureTest; import org.apache.ignite.internal.processors.cache.distributed.CacheOperationsInterruptTest; +import org.apache.ignite.internal.processors.cache.distributed.CacheRemoveWithTombstonesLoadTest; +import org.apache.ignite.internal.processors.cache.distributed.CacheRemoveWithTombstonesTest; import org.apache.ignite.internal.processors.cache.distributed.FailBackupOnAtomicOperationTest; import org.apache.ignite.internal.processors.cache.distributed.IgniteCachePrimarySyncTest; import org.apache.ignite.internal.processors.cache.distributed.IgniteTxCachePrimarySyncTest; import org.apache.ignite.internal.processors.cache.distributed.IgniteTxCacheWriteSynchronizationModesMultithreadedTest; import org.apache.ignite.internal.processors.cache.distributed.IgniteTxConcurrentRemoveObjectsTest; +import org.apache.ignite.internal.processors.cache.distributed.dht.topology.CacheRemoveWithTombstonesFailoverTest; import org.apache.ignite.internal.processors.cache.transactions.PartitionUpdateCounterTest; import org.apache.ignite.internal.processors.cache.transactions.TxDataConsistencyOnCommitFailureTest; import org.apache.ignite.internal.processors.cache.transactions.TxPartitionCounterStateConsistencyHistoryRebalanceTest; @@ -122,6 +125,10 @@ public static List> suite(Collection ignoredTests) { GridTestUtils.addTestIfNeeded(suite, FailBackupOnAtomicOperationTest.class, ignoredTests); + GridTestUtils.addTestIfNeeded(suite, CacheRemoveWithTombstonesTest.class, ignoredTests); + GridTestUtils.addTestIfNeeded(suite, CacheRemoveWithTombstonesLoadTest.class, ignoredTests); + GridTestUtils.addTestIfNeeded(suite, CacheRemoveWithTombstonesFailoverTest.class, ignoredTests); + return suite; } } diff --git a/modules/indexing/src/main/java/org/apache/ignite/internal/processors/query/h2/database/H2PkHashIndex.java b/modules/indexing/src/main/java/org/apache/ignite/internal/processors/query/h2/database/H2PkHashIndex.java index f9ab6a4d3936b..db85a7b74deab 100644 --- a/modules/indexing/src/main/java/org/apache/ignite/internal/processors/query/h2/database/H2PkHashIndex.java +++ b/modules/indexing/src/main/java/org/apache/ignite/internal/processors/query/h2/database/H2PkHashIndex.java @@ -129,7 +129,7 @@ public H2PkHashIndex( continue; if (filter == null || filter.applyPartition(part)) - cursors.add(store.cursor(cctx.cacheId(), lowerObj, upperObj, null, mvccSnapshot)); + cursors.add(store.cursor(cctx.cacheId(), lowerObj, upperObj, null, mvccSnapshot, false)); } return new H2PkHashIndexCursor(cursors.iterator()); @@ -209,7 +209,7 @@ public H2PkHashIndex( int part = store.partId(); if (partsFilter == null || partsFilter.applyPartition(part)) - cursors.add(store.cursor(cctx.cacheId())); + cursors.add(store.cursor(cctx.cacheId(), false)); } Cursor pkHashCursor = new H2PkHashIndexCursor(cursors.iterator()); diff --git a/modules/indexing/src/main/java/org/apache/ignite/internal/processors/query/h2/opt/H2CacheRow.java b/modules/indexing/src/main/java/org/apache/ignite/internal/processors/query/h2/opt/H2CacheRow.java index 4bd584ff96f92..4ac6303ad5acf 100644 --- a/modules/indexing/src/main/java/org/apache/ignite/internal/processors/query/h2/opt/H2CacheRow.java +++ b/modules/indexing/src/main/java/org/apache/ignite/internal/processors/query/h2/opt/H2CacheRow.java @@ -230,6 +230,11 @@ private boolean removedRow() { return row.cacheId(); } + /** {@inheritDoc} */ + @Override public void cacheId(int cacheId) { + row.cacheId(cacheId); + } + /** {@inheritDoc} */ @Override public long mvccCoordinatorVersion() { return row.mvccCoordinatorVersion(); diff --git a/modules/indexing/src/test/java/org/apache/ignite/internal/processors/cache/IgniteCacheLockPartitionOnAffinityRunTest.java b/modules/indexing/src/test/java/org/apache/ignite/internal/processors/cache/IgniteCacheLockPartitionOnAffinityRunTest.java index 23fe4bae6dc6b..c6e0575843239 100644 --- a/modules/indexing/src/test/java/org/apache/ignite/internal/processors/cache/IgniteCacheLockPartitionOnAffinityRunTest.java +++ b/modules/indexing/src/test/java/org/apache/ignite/internal/processors/cache/IgniteCacheLockPartitionOnAffinityRunTest.java @@ -85,7 +85,7 @@ private static int getOrganizationCountFromPartitionMap(final IgniteEx ignite, int cnt = 0; - GridCursor c = pOrgs.dataStore().cursor(); + GridCursor c = pOrgs.dataStore().cursor(false); CacheObjectContext ctx = cacheAdapterOrg.context().cacheObjectContext(); @@ -120,7 +120,7 @@ private static int getPersonsCountFromPartitionMap(final IgniteEx ignite, int or int cnt = 0; - GridCursor c = pPers.dataStore().cursor(); + GridCursor c = pPers.dataStore().cursor(false); CacheObjectContext ctx = cacheAdapterPers.context().cacheObjectContext(); diff --git a/modules/indexing/src/test/java/org/apache/ignite/internal/processors/query/h2/GridIndexRebuildSelfTest.java b/modules/indexing/src/test/java/org/apache/ignite/internal/processors/query/h2/GridIndexRebuildSelfTest.java index b4bb9e9112150..71588d5cc465a 100644 --- a/modules/indexing/src/test/java/org/apache/ignite/internal/processors/query/h2/GridIndexRebuildSelfTest.java +++ b/modules/indexing/src/test/java/org/apache/ignite/internal/processors/query/h2/GridIndexRebuildSelfTest.java @@ -160,7 +160,7 @@ protected void checkDataState(IgniteEx srv, boolean afterRebuild) throws IgniteC assertNotNull(icache); for (IgniteCacheOffheapManager.CacheDataStore store : icache.context().offheap().cacheDataStores()) { - GridCursor cur = store.cursor(); + GridCursor cur = store.cursor(false); while (cur.next()) { CacheDataRow row = cur.get(); diff --git a/modules/indexing/src/test/java/org/apache/ignite/internal/processors/query/h2/GridIndexRebuildWithMvccEnabledSelfTest.java b/modules/indexing/src/test/java/org/apache/ignite/internal/processors/query/h2/GridIndexRebuildWithMvccEnabledSelfTest.java index 72118433badf6..528e5b5963322 100644 --- a/modules/indexing/src/test/java/org/apache/ignite/internal/processors/query/h2/GridIndexRebuildWithMvccEnabledSelfTest.java +++ b/modules/indexing/src/test/java/org/apache/ignite/internal/processors/query/h2/GridIndexRebuildWithMvccEnabledSelfTest.java @@ -98,7 +98,7 @@ private static void lockVersion(IgniteEx node) throws IgniteCheckedException { CacheObjectContext coCtx = icache.context().cacheObjectContext(); for (IgniteCacheOffheapManager.CacheDataStore store : icache.context().offheap().cacheDataStores()) { - GridCursor cur = store.cursor(); + GridCursor cur = store.cursor(false); while (cur.next()) { CacheDataRow row = cur.get(); diff --git a/modules/indexing/src/test/java/org/apache/ignite/util/GridCommandHandlerIndexingTest.java b/modules/indexing/src/test/java/org/apache/ignite/util/GridCommandHandlerIndexingTest.java index 822d51ad13838..2050a11e15f74 100644 --- a/modules/indexing/src/test/java/org/apache/ignite/util/GridCommandHandlerIndexingTest.java +++ b/modules/indexing/src/test/java/org/apache/ignite/util/GridCommandHandlerIndexingTest.java @@ -20,12 +20,32 @@ import java.io.File; import java.io.IOException; import java.io.RandomAccessFile; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.atomic.AtomicBoolean; +import javax.cache.Cache; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; +import org.apache.ignite.IgniteCheckedException; +import org.apache.ignite.cache.QueryEntity; +import org.apache.ignite.cache.QueryIndex; +import org.apache.ignite.cache.query.ScanQuery; import org.apache.ignite.internal.IgniteEx; +import org.apache.ignite.internal.metric.IoStatisticsHolderNoOp; +import org.apache.ignite.internal.processors.cache.GridCacheContext; +import org.apache.ignite.internal.processors.cache.IgniteCacheOffheapManager; +import org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition; +import org.apache.ignite.internal.processors.cache.persistence.CacheDataRow; +import org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager; import org.apache.ignite.internal.processors.cache.persistence.file.FilePageStoreManager; +import org.apache.ignite.internal.processors.cache.tree.SearchRow; +import org.apache.ignite.internal.processors.query.GridQueryProcessor; +import org.apache.ignite.internal.util.lang.GridIterator; +import org.apache.ignite.internal.util.typedef.internal.CU; +import org.apache.ignite.internal.util.typedef.internal.U; import org.junit.Test; import static org.apache.ignite.internal.commandline.CommandHandler.EXIT_CODE_OK; @@ -157,4 +177,154 @@ private void corruptIndexPartition(File path) throws IOException { idx.write(trash); } } + + /** + * Removes some entries from a partition skipping index update. This effectively breaks the index. + */ + private void breakCacheDataTree(Ignite ig, String cacheName, int partId) { + IgniteEx ig0 = (IgniteEx)ig; + int cacheId = CU.cacheId(cacheName); + + ScanQuery scanQry = new ScanQuery(partId); + + GridCacheContext ctx = ig0.context().cache().context().cacheContext(cacheId); + + // Get current update counter + String grpName = ig0.context().cache().context().cacheContext(cacheId).config().getGroupName(); + int cacheGrpId = grpName == null ? cacheName.hashCode() : grpName.hashCode(); + + GridDhtLocalPartition locPart = ctx.dht().topology().localPartition(partId); + IgniteCacheOffheapManager.CacheDataStore dataStore = ig0.context().cache().context().cache().cacheGroup(cacheGrpId).offheap().dataStore(locPart); + + Iterator it = ig.cache(cacheName).withKeepBinary().query(scanQry).iterator(); + + for (int i = 0; i < 5_000; i++) { + if (it.hasNext()) { + Cache.Entry entry = it.next(); + + if (i % 5 == 0) { + // Do update + GridCacheDatabaseSharedManager db = (GridCacheDatabaseSharedManager)ig0.context().cache().context().database(); + + db.checkpointReadLock(); + + try { + IgniteCacheOffheapManager.CacheDataStore innerStore = U.field(dataStore, "delegate"); + + // IgniteCacheOffheapManagerImpl.CacheDataRowStore + Object rowStore = U.field(innerStore, "rowStore"); + + // IgniteCacheOffheapManagerImpl.CacheDataTree + Object dataTree = U.field(innerStore, "dataTree"); + + CacheDataRow oldRow = U.invoke( + dataTree.getClass(), + dataTree, + "remove", + new SearchRow(cacheId, ctx.toCacheKeyObject(entry.getKey()))); + + if (oldRow != null) + U.invoke(rowStore.getClass(), rowStore, "removeRow", oldRow.link(), IoStatisticsHolderNoOp.INSTANCE); + } + catch (IgniteCheckedException e) { + System.out.println("Failed to remove key skipping indexes: " + entry); + + e.printStackTrace(); + } + finally { + db.checkpointReadUnlock(); + } + } + } + else { + System.out.println("Early exit for index corruption, keys processed: " + i); + + break; + } + } + } + + /** + * Removes some entries from H2 trees skipping partition updates. This effectively breaks the index. + */ + private void breakSqlIndex(Ignite ig, String cacheName) throws Exception { + GridQueryProcessor qry = ((IgniteEx)ig).context().query(); + + GridCacheContext ctx = ((IgniteEx)ig).cachex(cacheName).context(); + + GridDhtLocalPartition locPart = ctx.topology().localPartitions().get(0); + + GridIterator it = ctx.group().offheap().partitionIterator(locPart.id()); + + for (int i = 0; i < 500; i++) { + if (!it.hasNextX()) { + System.out.println("Early exit for index corruption, keys processed: " + i); + + break; + } + + CacheDataRow row = it.nextX(); + + ctx.shared().database().checkpointReadLock(); + + try { + qry.remove(ctx, row); + } + finally { + ctx.shared().database().checkpointReadUnlock(); + } + } + } + + /** + * @param idxName Index name. + * @param idxOrgId Index org id. + */ + private QueryEntity personEntity(boolean idxName, boolean idxOrgId) { + QueryEntity entity = new QueryEntity(); + + entity.setKeyType(Integer.class.getName()); + entity.setValueType(Person.class.getName()); + + entity.addQueryField("orgId", Integer.class.getName(), null); + entity.addQueryField("name", String.class.getName(), null); + + List idxs = new ArrayList<>(); + + if (idxName) { + QueryIndex idx = new QueryIndex("name"); + + idxs.add(idx); + } + + if (idxOrgId) { + QueryIndex idx = new QueryIndex("orgId"); + + idxs.add(idx); + } + + entity.setIndexes(idxs); + + return entity; + } + + /** + * + */ + private static class Person implements Serializable { + /** */ + int orgId; + + /** */ + String name; + + /** + * @param orgId Organization ID. + * @param name Name. + */ + public Person(int orgId, String name) { + this.orgId = orgId; + this.name = name; + } + } }