From 2ae95bd6e65f698120eb3a7b774b4a7b252ce168 Mon Sep 17 00:00:00 2001 From: Maja Kabiljo Date: Fri, 26 Aug 2016 13:51:37 -0700 Subject: [PATCH] GIRAPH-1108: Allow measuring time spent doing GC in some interval Summary: Sometimes when things are slow, we want to know whether it's because of GC or not. Keep track of last k GC pauses and a way to check how much time since some timestamp was spent doing GC. Test Plan: Ran a job which periodically prints stats from this and manually verified based on GC logs that it's measuring it correctly Differential Revision: https://reviews.facebook.net/D62727 --- .../apache/giraph/graph/GraphTaskManager.java | 1 + .../apache/giraph/metrics/GiraphMetrics.java | 15 +++ .../org/apache/giraph/utils/GcTracker.java | 97 +++++++++++++++++++ 3 files changed, 113 insertions(+) create mode 100644 giraph-core/src/main/java/org/apache/giraph/utils/GcTracker.java diff --git a/giraph-core/src/main/java/org/apache/giraph/graph/GraphTaskManager.java b/giraph-core/src/main/java/org/apache/giraph/graph/GraphTaskManager.java index 339b5e94b..6a7028a15 100644 --- a/giraph-core/src/main/java/org/apache/giraph/graph/GraphTaskManager.java +++ b/giraph-core/src/main/java/org/apache/giraph/graph/GraphTaskManager.java @@ -665,6 +665,7 @@ public void handleNotification(Notification notification, info.getGcInfo().getDuration() + "ms"); } gcTimeMetric.inc(info.getGcInfo().getDuration()); + GiraphMetrics.get().getGcTracker().gcOccurred(info.getGcInfo()); if (oocEngine != null) { oocEngine.gcCompleted(info); } diff --git a/giraph-core/src/main/java/org/apache/giraph/metrics/GiraphMetrics.java b/giraph-core/src/main/java/org/apache/giraph/metrics/GiraphMetrics.java index 54d960461..f4b203933 100644 --- a/giraph-core/src/main/java/org/apache/giraph/metrics/GiraphMetrics.java +++ b/giraph-core/src/main/java/org/apache/giraph/metrics/GiraphMetrics.java @@ -18,6 +18,7 @@ package org.apache.giraph.metrics; import org.apache.giraph.conf.GiraphConfiguration; +import org.apache.giraph.utils.GcTracker; import com.google.common.collect.Lists; @@ -42,6 +43,9 @@ public class GiraphMetrics { /** registry for required per-job metrics */ private final GiraphMetricsRegistry perJobRequired; + /** Garbage collection tracker */ + private final GcTracker gcTracker; + /** observer for per-superstep metrics re-initialization */ private final List observers = Lists.newArrayList(); @@ -53,6 +57,7 @@ private GiraphMetrics() { perJobOptional = GiraphMetricsRegistry.createFake(); perSuperstep = SuperstepMetricsRegistry.createFake(); perJobRequired = GiraphMetricsRegistry.createWithOptional("giraph", "job"); + gcTracker = new GcTracker(); } /** @@ -64,6 +69,7 @@ private GiraphMetrics(GiraphConfiguration conf) { perJobOptional = GiraphMetricsRegistry.create(conf, "giraph", "job"); perSuperstep = SuperstepMetricsRegistry.create(conf, INPUT_SUPERSTEP); perJobRequired = GiraphMetricsRegistry.createWithOptional("giraph", "job"); + gcTracker = new GcTracker(conf); } /** @@ -111,6 +117,15 @@ public SuperstepMetricsRegistry perSuperstep() { return perSuperstep; } + /** + * Get GC tracker + * + * @return Gc tracker + */ + public GcTracker getGcTracker() { + return gcTracker; + } + /** * Anyone using per-superstep counters needs to re-initialize their Metrics * object on each new superstep. Otherwise they will always be updating just diff --git a/giraph-core/src/main/java/org/apache/giraph/utils/GcTracker.java b/giraph-core/src/main/java/org/apache/giraph/utils/GcTracker.java new file mode 100644 index 000000000..234b205b0 --- /dev/null +++ b/giraph-core/src/main/java/org/apache/giraph/utils/GcTracker.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.giraph.utils; + +import org.apache.giraph.conf.IntConfOption; +import org.apache.hadoop.conf.Configuration; + +import com.sun.management.GcInfo; + +/** + * Tracks last k GC pauses and is able to tell how much time was spent doing + * GC since some point in time. Thread safe. + */ +public class GcTracker { + /** How many last gcs to keep */ + public static final IntConfOption LAST_GCS_TO_KEEP = + new IntConfOption("giraph.lastGcsToKeep", 100, + "How many last gcs to keep"); + + /** Last k GCs which happened */ + private final GcInfo[] lastKGcs; + /** Next position to write gc to */ + private int positionInGcs = 0; + + /** + * Constructor with default number of last gcs to keep + */ + public GcTracker() { + lastKGcs = new GcInfo[LAST_GCS_TO_KEEP.getDefaultValue()]; + } + + /** + * Constructor with configuration + * + * @param conf Configuration + */ + public GcTracker(Configuration conf) { + lastKGcs = new GcInfo[LAST_GCS_TO_KEEP.get(conf)]; + } + + /** + * Called to notify gc tracker that gc occurred + * + * @param gcInfo GC info + */ + public synchronized void gcOccurred(GcInfo gcInfo) { + lastKGcs[positionInGcs] = gcInfo; + positionInGcs++; + if (positionInGcs == lastKGcs.length) { + positionInGcs = 0; + } + } + + /** + * Check how much time was spent doing gc since some timestamp + * + * @param timeSinceJvmStarted Timestamp to measure from, you can use + * ManagementFactory.getRuntimeMXBean().getUptime() to get it + * @return How much time was spent doing gc since that timestamp (if there + * were more gcs than we are keeping (LAST_GCS_TO_KEEP) then it will be + * partial result) + */ + public synchronized long gcTimeSpentSince(long timeSinceJvmStarted) { + long ret = 0; + int currentPosition = positionInGcs; + do { + currentPosition--; + if (currentPosition < 0) { + currentPosition = lastKGcs.length - 1; + } + // If there was no such GC or GC started before the timestamp given, + // stop iterating + if (lastKGcs[currentPosition] == null || + lastKGcs[currentPosition].getStartTime() < timeSinceJvmStarted) { + break; + } + ret += lastKGcs[currentPosition].getDuration(); + } while (currentPosition != positionInGcs); + return ret; + } +}