Skip to content

Commit

Permalink
IGNITE-13510 Added snapshot status command to control.sh and JMX. (#1…
Browse files Browse the repository at this point in the history
…0202)

(cherry picked from commit 2361ff3)
  • Loading branch information
NSAmelchev committed Aug 18, 2022
1 parent b000d59 commit 77be76c
Show file tree
Hide file tree
Showing 17 changed files with 625 additions and 23 deletions.
36 changes: 32 additions & 4 deletions docs/_docs/snapshots/snapshots.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ control.(sh|bat) --snapshot restore snapshot_02092020 --start --groups snapshot-
--

==== Using CLI to control restore operation
The `control.sh|bat` script provides the ability to start, stop, and get the status of the restore operation.
The `control.sh|bat` script provides the ability to start and stop the restore operation.

[source,shell]
----
Expand All @@ -234,13 +234,41 @@ control.(sh|bat) --snapshot restore snapshot_09062021 --src /tmp/ignite/snapshot
# Start restoring only "cache-group1" and "cache-group2" from the snapshot "snapshot_09062021" in the background.
control.(sh|bat) --snapshot restore snapshot_09062021 --start --groups cache-group1,cache-group2
# Get the status of the restore operation for "snapshot_09062021".
control.(sh|bat) --snapshot restore snapshot_09062021 --status
# Cancel the restore operation for "snapshot_09062021".
control.(sh|bat) --snapshot restore snapshot_09062021 --cancel
----

== Getting Snapshot Operation Status

The status of the current snapshot operation in the cluster can be obtained using the `control.sh|bat` script or JMX interface:

[tabs]
--
tab:Unix[]
[source,shell]
----
# Get the status of the snapshot operation.
control.sh --snapshot status
----

tab:Windows[]
[source,shell]
----
# Get the status of the snapshot operation.
control.bat --snapshot status
----

tab:JMX[]
You can also get the current snapshot status via the `SnapshotMXBean` interface:
[source,java]
----
SnapshotMXBean mxBean = ...;
// The status of a current snapshot operation in the cluster.
String status = mxBean.status();
----
--

== Consistency Guarantees

All snapshots are fully consistent in terms of concurrent cluster-wide operations as well as ongoing changes with Ignite.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import static org.apache.ignite.internal.commandline.snapshot.SnapshotRestoreCommandOption.SYNC;
import static org.apache.ignite.internal.commandline.snapshot.SnapshotSubcommands.RESTORE;
import static org.apache.ignite.internal.visor.snapshot.VisorSnapshotRestoreTaskAction.START;
import static org.apache.ignite.internal.visor.snapshot.VisorSnapshotRestoreTaskAction.STATUS;

/**
* Sub-command to restore snapshot.
Expand All @@ -49,6 +50,9 @@ protected SnapshotRestoreCommand() {

/** {@inheritDoc} */
@Override public Object execute(GridClientConfiguration clientCfg, Logger log) throws Exception {
if (cmdArg instanceof VisorSnapshotRestoreTaskArg && ((VisorSnapshotRestoreTaskArg)cmdArg).jobAction() == STATUS)
log.warning("Command deprecated. Use '" + SNAPSHOT + ' ' + SnapshotSubcommands.STATUS + "' instead.");

Object res = super.execute(clientCfg, log);

log.info(String.valueOf(res));
Expand Down Expand Up @@ -122,7 +126,8 @@ else if (option == SYNC) {

usage(log, "Restore snapshot:", SNAPSHOT, startParams, RESTORE.toString(), SNAPSHOT_NAME_ARG, "--start",
optional(GROUPS.argName(), GROUPS.arg()), optional(SOURCE.argName(), SOURCE.arg()), optional(SYNC.argName()));
usage(log, "Snapshot restore operation status:", SNAPSHOT, params, RESTORE.toString(), SNAPSHOT_NAME_ARG, "--status");
usage(log, "Snapshot restore operation status (Command deprecated. Use '" + SNAPSHOT + ' '
+ SnapshotSubcommands.STATUS + "' instead.):", SNAPSHOT, params, RESTORE.toString(), SNAPSHOT_NAME_ARG, "--status");
usage(log, "Cancel snapshot restore operation:", SNAPSHOT, params, RESTORE.toString(), SNAPSHOT_NAME_ARG, "--cancel");
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.ignite.internal.commandline.snapshot;

import java.text.DateFormat;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.logging.Logger;
import java.util.stream.Collectors;
import org.apache.ignite.internal.commandline.CommandArgIterator;
import org.apache.ignite.internal.commandline.systemview.SystemViewCommand;
import org.apache.ignite.internal.util.GridStringBuilder;
import org.apache.ignite.internal.util.typedef.F;
import org.apache.ignite.internal.util.typedef.X;
import org.apache.ignite.internal.util.typedef.internal.U;
import org.apache.ignite.internal.visor.snapshot.VisorSnapshotStatusTask;
import org.apache.ignite.internal.visor.snapshot.VisorSnapshotStatusTask.SnapshotStatus;

import static org.apache.ignite.internal.commandline.CommandList.SNAPSHOT;
import static org.apache.ignite.internal.commandline.snapshot.SnapshotSubcommands.STATUS;
import static org.apache.ignite.internal.visor.systemview.VisorSystemViewTask.SimpleType.NUMBER;
import static org.apache.ignite.internal.visor.systemview.VisorSystemViewTask.SimpleType.STRING;

/**
* Command to get the status of the current snapshot operation in the cluster.
*/
public class SnapshotStatusCommand extends SnapshotSubcommand {
/** */
protected SnapshotStatusCommand() {
super("status", VisorSnapshotStatusTask.class);
}

/** {@inheritDoc} */
@Override protected void printResult(Object res, Logger log) {
if (res == null) {
log.info("There is no create or restore snapshot operation in progress.");

return;
}

SnapshotStatus status = (SnapshotStatus)res;

boolean isCreating = status.operation() == VisorSnapshotStatusTask.SnapshotOperation.CREATE;

GridStringBuilder s = new GridStringBuilder();

if (isCreating)
s.a("Create snapshot operation is in progress.").nl();
else
s.a("Restore snapshot operation is in progress.").nl();

s.a("Snapshot name: ").a(status.name()).nl();
s.a("Operation ID: ").a(status.requestId()).nl();
s.a("Started at: ").a(DateFormat.getDateTimeInstance().format(new Date(status.startTime()))).nl();
s.a("Duration: ").a(X.timeSpan2DHMSM(System.currentTimeMillis() - status.startTime())).nl()
.nl();
s.a("Estimated operation progress:").nl();

log.info(s.toString());

List<String> titles = isCreating ? F.asList("Node ID", "Processed, bytes", "Total, bytes", "Percent") :
F.asList("Node ID", "Processed, partitions", "Total, partitions", "Percent");

List<List<?>> rows = status.progress().entrySet().stream().sorted(Map.Entry.comparingByKey()).map(e -> {
UUID nodeId = e.getKey();
long processed = e.getValue().get1();
long total = e.getValue().get2();

if (total <= 0)
return F.asList(nodeId, "unknown", "unknown", "unknown");

String percent = (int)(processed * 100 / total) + "%";

if (isCreating)
return F.asList(nodeId, U.humanReadableByteCount(processed), U.humanReadableByteCount(total), percent);
else
return F.asList(nodeId, processed, total, percent);
}).collect(Collectors.toList());

SystemViewCommand.printTable(titles, F.asList(STRING, NUMBER, NUMBER, NUMBER),
rows, log);

log.info(U.nl());
}

/** {@inheritDoc} */
@Override public void parseArguments(CommandArgIterator argIter) {
if (argIter.hasNextSubArg())
throw new IllegalArgumentException("Unexpected argument: " + argIter.peekNextArg() + '.');
}

/** {@inheritDoc} */
@Override public void printUsage(Logger log) {
usage(log, "Get the status of the current snapshot operation:", SNAPSHOT, STATUS.toString());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,10 @@ public enum SnapshotSubcommands {
CHECK(new SnapshotCheckCommand()),

/** Sub-command to restore snapshot. */
RESTORE(new SnapshotRestoreCommand());
RESTORE(new SnapshotRestoreCommand()),

/** Sub-command to get the status of the current snapshot operation. */
STATUS(new SnapshotStatusCommand());

/** Sub-command. */
private final SnapshotSubcommand cmd;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
import java.util.logging.Logger;
import java.util.stream.Collectors;
import org.apache.ignite.Ignite;
import org.apache.ignite.IgniteCache;
import org.apache.ignite.IgniteDataStreamer;
import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
import org.apache.ignite.cluster.ClusterNode;
import org.apache.ignite.configuration.AtomicConfiguration;
Expand Down Expand Up @@ -455,9 +455,10 @@ protected void createCacheAndPreload(

ignite.createCache(ccfg);

IgniteCache<Object, Object> cache = ignite.cache(cacheName);
for (int i = 0; i < countEntries; i++)
cache.put(i, i);
try (IgniteDataStreamer<Object, Object> streamer = ignite.dataStreamer(cacheName)) {
for (int i = 0; i < countEntries; i++)
streamer.addData(i, i);
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@
import org.apache.ignite.internal.processors.cluster.ChangeGlobalStateFinishMessage;
import org.apache.ignite.internal.processors.cluster.GridClusterStateProcessor;
import org.apache.ignite.internal.util.BasicRateLimiter;
import org.apache.ignite.internal.util.distributed.SingleNodeMessage;
import org.apache.ignite.internal.util.future.IgniteFinishedFutureImpl;
import org.apache.ignite.internal.util.lang.GridAbsPredicate;
import org.apache.ignite.internal.util.lang.GridFunc;
Expand Down Expand Up @@ -3474,9 +3475,92 @@ public void testSnapshotRestoreCancelAndStatus() throws Exception {
assertNull(ig.cache(DEFAULT_CACHE_NAME));
}

/** @throws Exception If fails. */
@Test
public void testSnapshotStatus() throws Exception {
String snapshotName = "snapshot1";
int keysCnt = 10_000;

IgniteEx srv = startGrids(3);

srv.cluster().state(ACTIVE);

createCacheAndPreload(srv, keysCnt);

checkSnapshotStatus(false, false, null);

TestRecordingCommunicationSpi spi = TestRecordingCommunicationSpi.spi(grid(1));

spi.blockMessages((node, msg) -> msg instanceof SingleNodeMessage);

IgniteFuture<Void> fut = srv.snapshot().createSnapshot(snapshotName);

spi.waitForBlocked();

checkSnapshotStatus(true, false, snapshotName);

spi.stopBlock();

fut.get(getTestTimeout());

checkSnapshotStatus(false, false, null);

srv.destroyCache(DEFAULT_CACHE_NAME);

spi.blockMessages((node, msg) -> msg instanceof SingleNodeMessage);

fut = srv.snapshot().restoreSnapshot(snapshotName, F.asList(DEFAULT_CACHE_NAME));

spi.waitForBlocked();

checkSnapshotStatus(false, true, snapshotName);

spi.stopBlock();

fut.get(getTestTimeout());

checkSnapshotStatus(false, false, null);
}

/**
* @throws Exception If failed.
* @param isCreating {@code True} if create snapshot operation is in progress.
* @param isRestoring {@code True} if restore snapshot operation is in progress.
* @param expName Expected snapshot name.
*/
private void checkSnapshotStatus(boolean isCreating, boolean isRestoring, String expName) throws Exception {
assertTrue(waitForCondition(() -> G.allGrids().stream().allMatch(
ignite -> {
IgniteSnapshotManager mgr = ((IgniteEx)ignite).context().cache().context().snapshotMgr();

return isCreating == mgr.isSnapshotCreating() && isRestoring == mgr.isRestoring();
}),
getTestTimeout()));

injectTestSystemOut();

int status = execute("--snapshot", "status");

String out = testOut.toString();

assertEquals(out, EXIT_CODE_OK, status);

if (!isCreating && !isRestoring) {
assertContains(log, out, "There is no create or restore snapshot operation in progress.");

return;
}

if (isCreating)
assertContains(log, out, "Create snapshot operation is in progress.");
else
assertContains(log, out, "Restore snapshot operation is in progress.");

assertContains(log, out, "Snapshot name: " + expName);

G.allGrids().forEach(srv -> assertContains(log, out, srv.cluster().localNode().id().toString()));
}

/** @throws Exception If failed. */
@Test
@WithSystemProperty(key = IGNITE_PDS_SKIP_CHECKPOINT_ON_NODE_STOP, value = "true")
public void testCleaningGarbageAfterCacheDestroyedAndNodeStop_ControlConsoleUtil() throws Exception {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1037,6 +1037,11 @@ public boolean isSnapshotCreating() {
}
}

/** @return Current create snapshot request. {@code Null} if there is no create snapshot operation in progress. */
@Nullable public SnapshotOperationRequest currentCreateRequest() {
return clusterSnpReq;
}

/**
* Check if snapshot restore process is currently running.
*
Expand Down

0 comments on commit 77be76c

Please sign in to comment.