Skip to content

Commit

Permalink
GG-19486 Fixed IgniteOutOfMemoryException when IgniteCache#clear() is…
Browse files Browse the repository at this point in the history
… called
  • Loading branch information
sk0x50 committed Jul 9, 2019
1 parent dcf0cfd commit d1b0787
Show file tree
Hide file tree
Showing 8 changed files with 223 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1705,7 +1705,6 @@ private CacheDataStore init0(boolean checkExists) throws IgniteCheckedException
freeListName,
grp.dataRegion().memoryMetrics(),
grp.dataRegion(),
null,
ctx.wal(),
reuseRoot.pageId().pageId(),
reuseRoot.isAllocated(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,14 @@
import org.apache.ignite.configuration.DataRegionConfiguration;
import org.apache.ignite.configuration.DataStorageConfiguration;
import org.apache.ignite.configuration.IgniteConfiguration;
import org.apache.ignite.failure.FailureContext;
import org.apache.ignite.failure.FailureType;
import org.apache.ignite.internal.GridKernalContext;
import org.apache.ignite.internal.IgniteInternalFuture;
import org.apache.ignite.internal.managers.discovery.GridDiscoveryManager;
import org.apache.ignite.internal.mem.DirectMemoryProvider;
import org.apache.ignite.internal.mem.DirectMemoryRegion;
import org.apache.ignite.internal.mem.IgniteOutOfMemoryException;
import org.apache.ignite.internal.mem.file.MappedFileMemoryProvider;
import org.apache.ignite.internal.mem.unsafe.UnsafeMemoryProvider;
import org.apache.ignite.internal.pagemem.PageMemory;
Expand All @@ -57,6 +60,7 @@
import org.apache.ignite.internal.processors.cache.persistence.evict.Random2LruPageEvictionTracker;
import org.apache.ignite.internal.processors.cache.persistence.evict.RandomLruPageEvictionTracker;
import org.apache.ignite.internal.processors.cache.persistence.filename.PdsFolderSettings;
import org.apache.ignite.internal.processors.cache.persistence.freelist.AbstractFreeList;
import org.apache.ignite.internal.processors.cache.persistence.freelist.CacheFreeList;
import org.apache.ignite.internal.processors.cache.persistence.freelist.FreeList;
import org.apache.ignite.internal.processors.cache.persistence.metastorage.MetaStorage;
Expand Down Expand Up @@ -257,7 +261,6 @@ protected void initPageMemoryDataStructures(DataStorageConfiguration dbCfg) thro
freeListName,
memMetrics,
memPlc,
null,
persistenceEnabled ? cctx.wal() : null,
0L,
true,
Expand Down Expand Up @@ -982,6 +985,68 @@ public void releaseHistoryForPreloading() {
// No-op
}

/**
* Checks that the given {@code region} has enough space for putting a new entry.
*
* This method makes sense then and only then
* the data region is not persisted {@link DataRegionConfiguration#isPersistenceEnabled()}
* and page eviction is disabled {@link DataPageEvictionMode#DISABLED}.
*
* The non-persistent region should reserve a number of pages to support a free list {@link AbstractFreeList}.
* For example, removing a row from underlying store may require allocating a new data page
* in order to move a tracked page from one bucket to another one which does not have a free space for a new stripe.
* See {@link AbstractFreeList#removeDataRowByLink}.
* Therefore, inserting a new entry should be prevented in case of some threshold is exceeded.
*
* @param region Data region to be checked.
* @throws IgniteOutOfMemoryException In case of the given data region does not have enough free space
* for putting a new entry.
*/
public void ensureFreeSpaceForInsert(DataRegion region) throws IgniteOutOfMemoryException {
if (region == null)
return;

DataRegionConfiguration regCfg = region.config();

if (regCfg.getPageEvictionMode() != DataPageEvictionMode.DISABLED || regCfg.isPersistenceEnabled())
return;

long memorySize = regCfg.getMaxSize();

PageMemory pageMem = region.pageMemory();

CacheFreeList freeList = freeListMap.get(regCfg.getName());

long nonEmptyPages = (pageMem.loadedPages() - freeList.emptyDataPages());

// The maximum number of pages that can be allocated (memorySize / systemPageSize)
// should be greater or equal to the current number of non-empty pages plus
// the number of pages that may be required in order to move all pages to a reuse bucket,
// that is equal to nonEmptyPages * 8 / pageSize, where 8 is the size of a link.
// Note that not the whole page can be used to storing links,
// see PagesListNodeIO and PagesListMetaIO#getCapacity(), so we pessimistically multiply the result on 1.5,
// in any way, the number of required pages is less than 1 percent.
boolean oomThreshold = (memorySize / pageMem.systemPageSize()) <
(nonEmptyPages * (8.0 / pageMem.pageSize() + 1) * 1.5 + 256 /*one page per bucket*/);

if (oomThreshold) {
IgniteOutOfMemoryException oom = new IgniteOutOfMemoryException("Out of memory in data region [" +
"name=" + regCfg.getName() +
", initSize=" + U.readableSize(regCfg.getInitialSize(), false) +
", maxSize=" + U.readableSize(regCfg.getMaxSize(), false) +
", persistenceEnabled=" + regCfg.isPersistenceEnabled() + "] Try the following:" + U.nl() +
" ^-- Increase maximum off-heap memory size (DataRegionConfiguration.maxSize)" + U.nl() +
" ^-- Enable Ignite persistence (DataRegionConfiguration.persistenceEnabled)" + U.nl() +
" ^-- Enable eviction or expiration policies"
);

if (cctx.kernalContext() != null)
cctx.kernalContext().failure().process(new FailureContext(FailureType.CRITICAL_ERROR, oom));

throw oom;
}
}

/**
* See {@link GridCacheMapEntry#ensureFreeSpace()}
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package org.apache.ignite.internal.processors.cache.persistence;

import org.apache.ignite.IgniteCheckedException;
import org.apache.ignite.internal.metric.IoStatisticsHolder;
import org.apache.ignite.internal.pagemem.PageIdUtils;
import org.apache.ignite.internal.pagemem.PageMemory;
import org.apache.ignite.internal.processors.cache.CacheGroupContext;
Expand All @@ -25,7 +26,6 @@
import org.apache.ignite.internal.processors.cache.persistence.freelist.FreeList;
import org.apache.ignite.internal.processors.cache.persistence.tree.util.PageHandler;
import org.apache.ignite.internal.processors.query.GridQueryRowCacheCleaner;
import org.apache.ignite.internal.metric.IoStatisticsHolder;
import org.apache.ignite.internal.util.typedef.internal.U;

/**
Expand All @@ -50,6 +50,9 @@ public class RowStore {
/** Row cache cleaner. */
private GridQueryRowCacheCleaner rowCacheCleaner;

/** */
protected final CacheGroupContext grp;

/**
* @param grp Cache group.
* @param freeList Free list.
Expand All @@ -58,6 +61,7 @@ public RowStore(CacheGroupContext grp, FreeList freeList) {
assert grp != null;
assert freeList != null;

this.grp = grp;
this.freeList = freeList;

ctx = grp.shared();
Expand Down Expand Up @@ -96,8 +100,11 @@ public void removeRow(long link, IoStatisticsHolder statHolder) throws IgniteChe
* @throws IgniteCheckedException If failed.
*/
public void addRow(CacheDataRow row, IoStatisticsHolder statHolder) throws IgniteCheckedException {
if (!persistenceEnabled)
if (!persistenceEnabled) {
ctx.database().ensureFreeSpaceForInsert(grp.dataRegion());

freeList.insertDataRow(row, statHolder);
}
else {
ctx.database().checkpointReadLock();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,13 @@
package org.apache.ignite.internal.processors.cache.persistence.freelist;

import org.apache.ignite.IgniteCheckedException;
import org.apache.ignite.internal.metric.IoStatisticsHolder;
import org.apache.ignite.internal.pagemem.PageIdUtils;
import org.apache.ignite.internal.pagemem.wal.IgniteWriteAheadLogManager;
import org.apache.ignite.internal.processors.cache.persistence.CacheDataRow;
import org.apache.ignite.internal.processors.cache.persistence.DataRegion;
import org.apache.ignite.internal.processors.cache.persistence.DataRegionMetricsImpl;
import org.apache.ignite.internal.processors.cache.persistence.tree.reuse.ReuseList;
import org.apache.ignite.internal.processors.cache.persistence.tree.util.PageLockListener;
import org.apache.ignite.internal.metric.IoStatisticsHolder;
import org.apache.ignite.internal.util.typedef.internal.U;

/**
Expand All @@ -36,7 +35,6 @@ public class CacheFreeList extends AbstractFreeList<CacheDataRow> {
* @param name Name.
* @param regionMetrics Region metrics.
* @param dataRegion Data region.
* @param reuseList Reuse list.
* @param wal Wal.
* @param metaPageId Meta page id.
* @param initNew Initialize new.
Expand All @@ -46,7 +44,6 @@ public CacheFreeList(
String name,
DataRegionMetricsImpl regionMetrics,
DataRegion dataRegion,
ReuseList reuseList,
IgniteWriteAheadLogManager wal,
long metaPageId,
boolean initNew,
Expand All @@ -57,7 +54,7 @@ public CacheFreeList(
name,
regionMetrics,
dataRegion,
reuseList,
null,
wal,
metaPageId,
initNew,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,6 @@ public static void setSkipVersion(boolean skipVer) {
/** */
private final int partId;

/** */
private final CacheGroupContext grp;

/**
* @param grp Cache group.
* @param freeList Free list.
Expand All @@ -61,7 +58,6 @@ public CacheDataRowStore(CacheGroupContext grp, FreeList freeList, int partId) {
super(grp, freeList);

this.partId = partId;
this.grp = grp;
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
/*
* Copyright 2019 GridGain Systems, Inc. and Contributors.
*
* Licensed under the GridGain Community Edition License (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.gridgain.com/products/software/community-edition/gridgain-community-edition-license
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.ignite.internal.processors.cache;

import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.ignite.Ignite;
import org.apache.ignite.IgniteCache;
import org.apache.ignite.cache.CacheAtomicityMode;
import org.apache.ignite.configuration.CacheConfiguration;
import org.apache.ignite.configuration.DataRegionConfiguration;
import org.apache.ignite.configuration.DataStorageConfiguration;
import org.apache.ignite.configuration.IgniteConfiguration;
import org.apache.ignite.failure.AbstractFailureHandler;
import org.apache.ignite.failure.FailureContext;
import org.apache.ignite.internal.mem.IgniteOutOfMemoryException;
import org.apache.ignite.internal.util.typedef.X;
import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
import org.junit.Test;

import static org.apache.ignite.cache.CacheAtomicityMode.ATOMIC;
import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL;
import static org.apache.ignite.configuration.DataPageEvictionMode.DISABLED;

/**
* Tests behavior of IgniteCache when {@link IgniteOutOfMemoryException} is thrown.
*/
public class CacheIgniteOutOfMemoryExceptionTest extends GridCommonAbstractTest {
/** */
private static final long DATA_REGION_SIZE = 20L * 1024 * 1024;

/** */
private static final int ATTEMPTS_NUM = 3;

/** Node failure occurs. */
private static final AtomicBoolean failure = new AtomicBoolean(false);

/** {@inheritDoc} */
@Override protected IgniteConfiguration getConfiguration(String gridName) throws Exception {
IgniteConfiguration cfg = super.getConfiguration(gridName);

cfg.setDataStorageConfiguration(new DataStorageConfiguration()
.setDefaultDataRegionConfiguration(
new DataRegionConfiguration()
.setMaxSize(DATA_REGION_SIZE)
.setPageEvictionMode(DISABLED)
.setPersistenceEnabled(false)));

cfg.setFailureHandler(new AbstractFailureHandler() {
/** {@inheritDoc} */
@Override protected boolean handle(Ignite ignite, FailureContext failureCtx) {
failure.set(true);

// Do not invalidate a node context.
return false;
}
});

cfg.setCacheConfiguration(
new CacheConfiguration(ATOMIC.name()).setAtomicityMode(ATOMIC),
new CacheConfiguration(TRANSACTIONAL.name()).setAtomicityMode(TRANSACTIONAL));

return cfg;
}

/** {@inheritDoc} */
@Override protected void beforeTestsStarted() throws Exception {
startGrid(0);
}

/** {@inheritDoc} */
@Override protected void afterTestsStopped() throws Exception {
stopAllGrids();
}

/**
* @throws Exception If failed.
*/
@Test
public void testLoadAndClearAtomicCache() throws Exception {
loadAndClearCache(ATOMIC, ATTEMPTS_NUM);
}

/**
* @throws Exception If failed.
*/
@Test
public void testLoadAndClearTransactionalCache() throws Exception {
loadAndClearCache(TRANSACTIONAL, ATTEMPTS_NUM);
}

/**
* Creates a new cache with the given atomicity node and tries to load & clear it in a loop.
* It is assumed that {@link IgniteOutOfMemoryException} is thrown during loading the cache,
* however {@link IgniteCache#clear()} should return the cache to the operable state.
*
* @param mode Cache atomicity mode.
* @param attempts Number of attempts to load and clear the cache.
*/
private void loadAndClearCache(CacheAtomicityMode mode, int attempts) {
IgniteCache<Object, Object> cache = grid(0).cache(mode.name());

for (int i = 0; i < attempts; ++i) {
try {
for (int key = 0; key < 500_000; ++key)
cache.put(key, "abc");

fail("OutOfMemoryException hasn't been thrown");
}
catch (Exception e) {
assertTrue(
"Exception has been thrown, but the exception type is unexpected [exc=" + e + ']',
X.hasCause(e, IgniteOutOfMemoryException.class));

assertTrue("Failure handler should be called due to IOOM.", failure.get());
}

// Let's check that the cache can be cleared without any errors.
failure.set(false);

try {
cache.clear();
}
catch (Exception e) {
fail("Clearing the cache should not trigger any exception [exc=" + e +']');
}

assertFalse("Failure handler should not be called during clearing the cache.", failure.get());
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,6 @@ protected FreeList createFreeList(int pageSize) throws Exception {
regionMetrics,
dataRegion,
null,
null,
metaPageId,
true,
null
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.ignite.internal.processors.cache.CacheIgniteOutOfMemoryExceptionTest;
import org.apache.ignite.internal.processors.cache.CacheNoAffinityExchangeTest;
import org.apache.ignite.internal.processors.cache.ClientFastReplyCoordinatorFailureTest;
import org.apache.ignite.internal.processors.cache.PartitionedAtomicCacheGetsDistributionTest;
Expand Down Expand Up @@ -110,6 +111,7 @@ public static List<Class<?>> suite(Collection<Class> ignoredTests) {

// TODO enable this test after IGNITE-6753, now it takes too long
// GridTestUtils.addTestIfNeeded(suite, IgniteOutOfMemoryPropagationTest.class, ignoredTests);
GridTestUtils.addTestIfNeeded(suite, CacheIgniteOutOfMemoryExceptionTest.class, ignoredTests);

GridTestUtils.addTestIfNeeded(suite, ReplicatedAtomicCacheGetsDistributionTest.class, ignoredTests);
GridTestUtils.addTestIfNeeded(suite, ReplicatedTransactionalOptimisticCacheGetsDistributionTest.class, ignoredTests);
Expand Down

0 comments on commit d1b0787

Please sign in to comment.