Skip to content

Commit

Permalink
services, xds, orca: use application_utilization and fallback to cpu_…
Browse files Browse the repository at this point in the history
…utilization if unset in WRR (#10256)

Implements updates to [A51][] and [A58][].

Imported cncf/xds using import.sh script.

A51: grpc/proposal#374
A58: grpc/proposal#373
  • Loading branch information
danielzhaotongliu authored and ejona86 committed Jun 9, 2023
1 parent 311e7bc commit caeeb06
Show file tree
Hide file tree
Showing 17 changed files with 192 additions and 73 deletions.
6 changes: 3 additions & 3 deletions repositories.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,10 @@ def grpc_java_repositories():
if not native.existing_rule("com_github_cncf_xds"):
http_archive(
name = "com_github_cncf_xds",
strip_prefix = "xds-32f1caf87195bf3390061c29f18987e51ca56a88",
sha256 = "fcd0b50c013452fda9c5e28c131c287b655ebb361271a76ad3bffc08b3ecd82e",
strip_prefix = "xds-e9ce68804cb4e64cab5a52e3c8baf840d4ff87b7",
sha256 = "0d33b83f8c6368954e72e7785539f0d272a8aba2f6e2e336ed15fd1514bc9899",
urls = [
"https://github.com/cncf/xds/archive/32f1caf87195bf3390061c29f18987e51ca56a88.tar.gz",
"https://github.com/cncf/xds/archive/e9ce68804cb4e64cab5a52e3c8baf840d4ff87b7.tar.gz",
],
)
if not native.existing_rule("com_github_grpc_grpc"):
Expand Down
25 changes: 21 additions & 4 deletions services/src/main/java/io/grpc/services/CallMetricRecorder.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ public final class CallMetricRecorder {
private final AtomicReference<ConcurrentHashMap<String, Double>> requestCostMetrics =
new AtomicReference<>();
private double cpuUtilizationMetric = 0;
private double applicationUtilizationMetric = 0;
private double memoryUtilizationMetric = 0;
private double qps = 0;
private double eps = 0;
Expand Down Expand Up @@ -127,7 +128,7 @@ public CallMetricRecorder recordRequestCostMetric(String name, double value) {
}

/**
* Records a call metric measurement for CPU utilization in the range [0, 1]. Values outside the
* Records a call metric measurement for CPU utilization in the range [0, inf). Values outside the
* valid range are ignored. If RPC has already finished, this method is no-op.
*
* <p>A latter record will overwrite its former name-sakes.
Expand All @@ -136,13 +137,29 @@ public CallMetricRecorder recordRequestCostMetric(String name, double value) {
* @since 1.47.0
*/
public CallMetricRecorder recordCpuUtilizationMetric(double value) {
if (disabled || !MetricRecorderHelper.isCpuUtilizationValid(value)) {
if (disabled || !MetricRecorderHelper.isCpuOrApplicationUtilizationValid(value)) {
return this;
}
cpuUtilizationMetric = value;
return this;
}

/**
* Records a call metric measurement for application specific utilization in the range [0, inf).
* Values outside the valid range are ignored. If RPC has already finished, this method is no-op.
*
* <p>A latter record will overwrite its former name-sakes.
*
* @return this recorder object
*/
public CallMetricRecorder recordApplicationUtilizationMetric(double value) {
if (disabled || !MetricRecorderHelper.isCpuOrApplicationUtilizationValid(value)) {
return this;
}
applicationUtilizationMetric = value;
return this;
}

/**
* Records a call metric measurement for memory utilization in the range [0, 1]. Values outside
* the valid range are ignored. If RPC has already finished, this method is no-op.
Expand Down Expand Up @@ -221,8 +238,8 @@ MetricReport finalizeAndDump2() {
if (savedUtilizationMetrics == null) {
savedUtilizationMetrics = Collections.emptyMap();
}
return new MetricReport(cpuUtilizationMetric, memoryUtilizationMetric, qps, eps,
Collections.unmodifiableMap(savedRequestCostMetrics),
return new MetricReport(cpuUtilizationMetric, applicationUtilizationMetric,
memoryUtilizationMetric, qps, eps, Collections.unmodifiableMap(savedRequestCostMetrics),
Collections.unmodifiableMap(savedUtilizationMetrics)
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@ public static MetricReport finalizeAndDump2(CallMetricRecorder recorder) {
return recorder.finalizeAndDump2();
}

public static MetricReport createMetricReport(double cpuUtilization, double memoryUtilization,
double qps, double eps, Map<String, Double> requestCostMetrics,
Map<String, Double> utilizationMetrics) {
return new MetricReport(cpuUtilization, memoryUtilization, qps, eps, requestCostMetrics,
utilizationMetrics);
public static MetricReport createMetricReport(double cpuUtilization,
double applicationUtilization, double memoryUtilization, double qps, double eps,
Map<String, Double> requestCostMetrics, Map<String, Double> utilizationMetrics) {
return new MetricReport(cpuUtilization, applicationUtilization, memoryUtilization, qps, eps,
requestCostMetrics, utilizationMetrics);
}
}
23 changes: 21 additions & 2 deletions services/src/main/java/io/grpc/services/MetricRecorder.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
public final class MetricRecorder {
private volatile ConcurrentHashMap<String, Double> metricsData = new ConcurrentHashMap<>();
private volatile double cpuUtilization;
private volatile double applicationUtilization;
private volatile double memoryUtilization;
private volatile double qps;
private volatile double eps;
Expand Down Expand Up @@ -69,7 +70,7 @@ public void removeUtilizationMetric(String key) {
* are ignored.
*/
public void setCpuUtilizationMetric(double value) {
if (!MetricRecorderHelper.isCpuUtilizationValid(value)) {
if (!MetricRecorderHelper.isCpuOrApplicationUtilizationValid(value)) {
return;
}
cpuUtilization = value;
Expand All @@ -82,6 +83,24 @@ public void clearCpuUtilizationMetric() {
cpuUtilization = 0;
}

/**
* Update the application specific utilization metrics data in the range [0, inf). Values outside
* the valid range are ignored.
*/
public void setApplicationUtilizationMetric(double value) {
if (!MetricRecorderHelper.isCpuOrApplicationUtilizationValid(value)) {
return;
}
applicationUtilization = value;
}

/**
* Clear the application specific utilization metrics data.
*/
public void clearApplicationUtilizationMetric() {
applicationUtilization = 0;
}

/**
* Update the memory utilization metrics data in the range [0, 1]. Values outside the valid range
* are ignored.
Expand Down Expand Up @@ -135,7 +154,7 @@ public void clearEpsMetric() {
}

MetricReport getMetricReport() {
return new MetricReport(cpuUtilization, memoryUtilization, qps, eps,
return new MetricReport(cpuUtilization, applicationUtilization, memoryUtilization, qps, eps,
Collections.emptyMap(), Collections.unmodifiableMap(metricsData));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,12 @@ static boolean isUtilizationValid(double utilization) {
}

/**
* Return true if the cpu utilization value is in the range [0, inf) and false otherwise.
* Occasionally users have over 100% cpu utilization and get a runaway effect where the backend
* with highest qps gets more and more qps sent to it. So we allow cpu utilization > 1.0.
* Return true if the cpu utilization or application specific utilization value is in the range
* [0, inf) and false otherwise. Occasionally users have over 100% cpu utilization and get a
* runaway effect where the backend with highest qps gets more and more qps sent to it. So we
* allow cpu utilization > 1.0, similarly for application specific utilization.
*/
static boolean isCpuUtilizationValid(double utilization) {
static boolean isCpuOrApplicationUtilizationValid(double utilization) {
return utilization >= 0.0;
}

Expand Down
13 changes: 10 additions & 3 deletions services/src/main/java/io/grpc/services/MetricReport.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,18 @@
@ExperimentalApi("https://github.com/grpc/grpc-java/issues/9381")
public final class MetricReport {
private double cpuUtilization;
private double applicationUtilization;
private double memoryUtilization;
private double qps;
private double eps;
private Map<String, Double> requestCostMetrics;
private Map<String, Double> utilizationMetrics;

MetricReport(double cpuUtilization, double memoryUtilization, double qps, double eps,
Map<String, Double> requestCostMetrics,
Map<String, Double> utilizationMetrics) {
MetricReport(double cpuUtilization, double applicationUtilization, double memoryUtilization,
double qps, double eps, Map<String, Double> requestCostMetrics,
Map<String, Double> utilizationMetrics) {
this.cpuUtilization = cpuUtilization;
this.applicationUtilization = applicationUtilization;
this.memoryUtilization = memoryUtilization;
this.qps = qps;
this.eps = eps;
Expand All @@ -50,6 +52,10 @@ public double getCpuUtilization() {
return cpuUtilization;
}

public double getApplicationUtilization() {
return applicationUtilization;
}

public double getMemoryUtilization() {
return memoryUtilization;
}
Expand All @@ -74,6 +80,7 @@ public double getEps() {
public String toString() {
return MoreObjects.toStringHelper(this)
.add("cpuUtilization", cpuUtilization)
.add("applicationUtilization", applicationUtilization)
.add("memoryUtilization", memoryUtilization)
.add("requestCost", requestCostMetrics)
.add("utilization", utilizationMetrics)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ public void dumpDumpsAllSavedMetricValues() {
recorder.recordRequestCostMetric("cost2", 10293.0);
recorder.recordRequestCostMetric("cost3", 1.0);
recorder.recordCpuUtilizationMetric(0.1928);
recorder.recordApplicationUtilizationMetric(0.9987);
recorder.recordMemoryUtilizationMetric(0.474);
recorder.recordQpsMetric(2522.54);
recorder.recordEpsMetric(1.618);
Expand All @@ -55,15 +56,18 @@ public void dumpDumpsAllSavedMetricValues() {
Truth.assertThat(dump.getRequestCostMetrics())
.containsExactly("cost1", 37465.12, "cost2", 10293.0, "cost3", 1.0);
Truth.assertThat(dump.getCpuUtilization()).isEqualTo(0.1928);
Truth.assertThat(dump.getApplicationUtilization()).isEqualTo(0.9987);
Truth.assertThat(dump.getMemoryUtilization()).isEqualTo(0.474);
Truth.assertThat(dump.getQps()).isEqualTo(2522.54);
Truth.assertThat(dump.getEps()).isEqualTo(1.618);
Truth.assertThat(dump.toString()).contains("eps=1.618");
Truth.assertThat(dump.toString()).contains("applicationUtilization=0.9987");
}

@Test
public void noMetricsRecordedAfterSnapshot() {
Map<String, Double> initDump = recorder.finalizeAndDump();
recorder.recordApplicationUtilizationMetric(0.01);
recorder.recordUtilizationMetric("cost", 0.154353423);
recorder.recordQpsMetric(3.14159);
recorder.recordEpsMetric(1.618);
Expand All @@ -87,13 +91,15 @@ public void noMetricsRecordedIfUtilizationIsGreaterThanUpperBound() {
@Test
public void noMetricsRecordedIfUtilizationAndQpsAreLessThanLowerBound() {
recorder.recordCpuUtilizationMetric(-0.001);
recorder.recordApplicationUtilizationMetric(-0.001);
recorder.recordMemoryUtilizationMetric(-0.001);
recorder.recordQpsMetric(-0.001);
recorder.recordEpsMetric(-0.001);
recorder.recordUtilizationMetric("util1", -0.001);

MetricReport dump = recorder.finalizeAndDump2();
Truth.assertThat(dump.getCpuUtilization()).isEqualTo(0);
Truth.assertThat(dump.getApplicationUtilization()).isEqualTo(0);
Truth.assertThat(dump.getMemoryUtilization()).isEqualTo(0);
Truth.assertThat(dump.getQps()).isEqualTo(0);
Truth.assertThat(dump.getEps()).isEqualTo(0);
Expand All @@ -108,6 +114,8 @@ public void lastValueWinForMetricsWithSameName() {
recorder.recordRequestCostMetric("cost1", 6441.341);
recorder.recordRequestCostMetric("cost1", 4654.67);
recorder.recordRequestCostMetric("cost2", 75.83);
recorder.recordApplicationUtilizationMetric(0.92);
recorder.recordApplicationUtilizationMetric(1.78);
recorder.recordMemoryUtilizationMetric(0.13);
recorder.recordMemoryUtilizationMetric(0.31);
recorder.recordUtilizationMetric("util1", 0.2837421);
Expand All @@ -121,6 +129,7 @@ public void lastValueWinForMetricsWithSameName() {
MetricReport dump = recorder.finalizeAndDump2();
Truth.assertThat(dump.getRequestCostMetrics())
.containsExactly("cost1", 4654.67, "cost2", 75.83);
Truth.assertThat(dump.getApplicationUtilization()).isEqualTo(1.78);
Truth.assertThat(dump.getMemoryUtilization()).isEqualTo(0.93840);
Truth.assertThat(dump.getUtilizationMetrics())
.containsExactly("util1", 0.843233);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -228,12 +228,16 @@ final class OrcaReportListener implements OrcaPerRequestReportListener, OrcaOobR
@Override
public void onLoadReport(MetricReport report) {
double newWeight = 0;
if (report.getCpuUtilization() > 0 && report.getQps() > 0) {
// Prefer application utilization and fallback to CPU utilization if unset.
double utilization =
report.getApplicationUtilization() > 0 ? report.getApplicationUtilization()
: report.getCpuUtilization();
if (utilization > 0 && report.getQps() > 0) {
double penalty = 0;
if (report.getEps() > 0 && errorUtilizationPenalty > 0) {
penalty = report.getEps() / report.getQps() * errorUtilizationPenalty;
}
newWeight = report.getQps() / (report.getCpuUtilization() + penalty);
newWeight = report.getQps() / (utilization + penalty);
}
if (newWeight == 0) {
return;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ public void close(Status status, Metadata trailers) {
private static OrcaLoadReport.Builder fromInternalReport(MetricReport internalReport) {
return OrcaLoadReport.newBuilder()
.setCpuUtilization(internalReport.getCpuUtilization())
.setApplicationUtilization(internalReport.getApplicationUtilization())
.setMemUtilization(internalReport.getMemoryUtilization())
.setRpsFractional(internalReport.getQps())
.setEps(internalReport.getEps())
Expand All @@ -138,6 +139,10 @@ private static void mergeMetrics(
if (isReportValueSet(cpu)) {
metricRecorderReportBuilder.setCpuUtilization(cpu);
}
double applicationUtilization = callMetricRecorderReport.getApplicationUtilization();
if (isReportValueSet(applicationUtilization)) {
metricRecorderReportBuilder.setApplicationUtilization(applicationUtilization);
}
double mem = callMetricRecorderReport.getMemoryUtilization();
if (isReportValueSet(mem)) {
metricRecorderReportBuilder.setMemUtilization(mem);
Expand Down
5 changes: 3 additions & 2 deletions xds/src/main/java/io/grpc/xds/orca/OrcaPerRequestUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -254,8 +254,9 @@ public void inboundTrailers(Metadata trailers) {

static MetricReport fromOrcaLoadReport(OrcaLoadReport loadReport) {
return InternalCallMetricRecorder.createMetricReport(loadReport.getCpuUtilization(),
loadReport.getMemUtilization(), loadReport.getRpsFractional(), loadReport.getEps(),
loadReport.getRequestCostMap(), loadReport.getUtilizationMap());
loadReport.getApplicationUtilization(), loadReport.getMemUtilization(),
loadReport.getRpsFractional(), loadReport.getEps(), loadReport.getRequestCostMap(),
loadReport.getUtilizationMap());
}

/**
Expand Down
1 change: 1 addition & 0 deletions xds/src/main/java/io/grpc/xds/orca/OrcaServiceImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ private OrcaLoadReport generateMetricsReport() {
MetricReport internalReport =
InternalMetricRecorder.getMetricReport(metricRecorder);
return OrcaLoadReport.newBuilder().setCpuUtilization(internalReport.getCpuUtilization())
.setApplicationUtilization(internalReport.getApplicationUtilization())
.setMemUtilization(internalReport.getMemoryUtilization())
.setRpsFractional(internalReport.getQps())
.setEps(internalReport.getEps())
Expand Down
Loading

0 comments on commit caeeb06

Please sign in to comment.