Skip to content

Table creation gets progressively slower when creating a lot of tables #4684

@dlmarion

Description

@dlmarion

User reported this issue in the user mailing list. I wrote a small test (below) which shows that table creation gets progressively slower.

package org.apache.accumulo.test;

import java.time.Duration;

import org.apache.accumulo.core.client.Accumulo;
import org.apache.accumulo.core.client.AccumuloClient;
import org.apache.accumulo.harness.SharedMiniClusterBase;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;

public class CreateTableIT extends SharedMiniClusterBase {

  @Override
  protected Duration defaultTimeout() {
    return Duration.ofMinutes(5);
  }

  @BeforeAll
  public static void setup() throws Exception {
    SharedMiniClusterBase.startMiniCluster();
  }

  @AfterAll
  public static void teardown() {
    SharedMiniClusterBase.stopMiniCluster();
  }

  @Test
  public void testCreateLotsOfTables() throws Exception {
    try (AccumuloClient client = Accumulo.newClient().from(getClientProps()).build()) {

      String[] tableNames = getUniqueNames(1000);

      for (int i = 0; i < tableNames.length; i++) {
        // Create waits for the Fate operation to complete
        long start = System.currentTimeMillis();
        client.tableOperations().create(tableNames[i]);
        System.out.println("Table creation took: " + (System.currentTimeMillis() - start) + "ms");
      }
    }
  }

}

When I jstacked the Manager I noticed the following thread stack:

"Repo Runner-Worker-1" #100 daemon prio=5 os_prio=0 cpu=2402.44ms elapsed=78.24s tid=0x00007fa2b8002000 nid=0x59e9c runnable  [0x00007fa3acd7f000]
   java.lang.Thread.State: RUNNABLE
	at sun.nio.ch.IOUtil.write1(java.base@11.0.22/Native Method)
	at sun.nio.ch.EPollSelectorImpl.wakeup(java.base@11.0.22/EPollSelectorImpl.java:254)
	- locked <0x00000000f178c2f0> (a java.lang.Object)
	at org.apache.zookeeper.ClientCnxnSocketNIO.wakeupCnxn(ClientCnxnSocketNIO.java:324)
	- locked <0x00000000f139b620> (a org.apache.zookeeper.ClientCnxnSocketNIO)
	at org.apache.zookeeper.ClientCnxnSocketNIO.packetAdded(ClientCnxnSocketNIO.java:315)
	at org.apache.zookeeper.ClientCnxn.queuePacket(ClientCnxn.java:1680)
	at org.apache.zookeeper.ClientCnxn.submitRequest(ClientCnxn.java:1565)
	at org.apache.zookeeper.ClientCnxn.submitRequest(ClientCnxn.java:1555)
	at org.apache.zookeeper.ZooKeeper.getData(ZooKeeper.java:1970)
	at org.apache.accumulo.core.fate.zookeeper.ZooCache$2.run(ZooCache.java:387)
	at org.apache.accumulo.core.fate.zookeeper.ZooCache$2.run(ZooCache.java:1)
	at org.apache.accumulo.core.fate.zookeeper.ZooCache$ZooRunnable.retry(ZooCache.java:247)
	at org.apache.accumulo.core.fate.zookeeper.ZooCache.get(ZooCache.java:406)
	at org.apache.accumulo.core.fate.zookeeper.ZooCache.get(ZooCache.java:338)
	at org.apache.accumulo.core.util.tables.TableMap.<init>(TableMap.java:72)
	at org.apache.accumulo.core.util.tables.TableZooHelper.lambda$0(TableZooHelper.java:116)
	at org.apache.accumulo.core.util.tables.TableZooHelper$$Lambda$438/0x00000008403e0840.call(Unknown Source)
	at com.google.common.cache.LocalCache$LocalManualCache$1.load(LocalCache.java:4955)
	at com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3589)
	at com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2328)
	at com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2187)
	at com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2081)
	at com.google.common.cache.LocalCache.get(LocalCache.java:4036)
	at com.google.common.cache.LocalCache$LocalManualCache.get(LocalCache.java:4950)
	at org.apache.accumulo.core.util.tables.TableZooHelper.getCachedTableMap(TableZooHelper.java:116)
	at org.apache.accumulo.core.util.tables.TableZooHelper.getTableMap(TableZooHelper.java:106)
	at org.apache.accumulo.core.util.tables.TableZooHelper._getTableIdDetectNamespaceNotFound(TableZooHelper.java:74)
	at org.apache.accumulo.core.util.tables.TableZooHelper.getTableId(TableZooHelper.java:63)
	at org.apache.accumulo.core.clientImpl.ClientContext.getTableId(ClientContext.java:634)
	at org.apache.accumulo.core.clientImpl.ClientContext.createBatchWriter(ClientContext.java:756)
	at org.apache.accumulo.core.clientImpl.ClientContext.createBatchWriter(ClientContext.java:762)
	at org.apache.accumulo.server.metadata.TabletsMutatorImpl.getWriter(TabletsMutatorImpl.java:58)
	at org.apache.accumulo.server.metadata.TabletsMutatorImpl.mutateTablet(TabletsMutatorImpl.java:73)
	at org.apache.accumulo.server.metadata.ServerAmpleImpl.mutateTablet(ServerAmpleImpl.java:88)
	at org.apache.accumulo.server.util.MetadataTableUtil.addTablet(MetadataTableUtil.java:177)
	at org.apache.accumulo.manager.tableOps.create.PopulateMetadata.call(PopulateMetadata.java:68)
	at org.apache.accumulo.manager.tableOps.ManagerRepo.call(ManagerRepo.java:1)
	at org.apache.accumulo.manager.tableOps.TraceRepo.call(TraceRepo.java:60)
	at org.apache.accumulo.core.fate.Fate$TransactionRunner.run(Fate.java:100)
	at org.apache.accumulo.core.trace.TraceWrappedRunnable.run(TraceWrappedRunnable.java:52)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(java.base@11.0.22/ThreadPoolExecutor.java:1128)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(java.base@11.0.22/ThreadPoolExecutor.java:628)
	at org.apache.accumulo.core.trace.TraceWrappedRunnable.run(TraceWrappedRunnable.java:52)
	at java.lang.Thread.run(java.base@11.0.22/Thread.java:829)

It looks like ServerAmpleImpl is trying to update the metadata table for the new current table, but the code has to go back to ZooKeeper to update ZooCache for the tableIdMap due to the prior table creation.

Metadata

Metadata

Assignees

Labels

No labels
No labels

Type

No type

Projects

No projects

Milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions