Skip to content

Commit 77b34cc

Browse files
authored
[improve][test] Improve integration test profiling test example (#24701)
1 parent cc824e5 commit 77b34cc

File tree

7 files changed

+149
-36
lines changed

7 files changed

+149
-36
lines changed

tests/docker-images/java-test-image/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ USER root
2626
COPY target/scripts /pulsar/bin
2727
RUN chmod a+rx /pulsar/bin/*
2828

29-
RUN apk add --no-cache supervisor
29+
RUN apk add --no-cache supervisor jq
3030

3131
RUN mkdir -p /var/log/pulsar \
3232
&& mkdir -p /var/run/supervisor/ \

tests/docker-images/latest-version-image/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ FROM $PULSAR_ALL_IMAGE
6262
# However, any processes exec'ing into the containers will run as root, by default.
6363
USER root
6464

65-
RUN apk add --no-cache supervisor procps curl
65+
RUN apk add --no-cache supervisor procps curl jq
6666

6767
RUN mkdir -p /var/log/pulsar && mkdir -p /var/run/supervisor/
6868

tests/docker-images/latest-version-image/conf/supervisord.conf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ loglevel=info
2626
pidfile=/var/run/supervisord.pid
2727
minfds=1024
2828
minprocs=200
29+
user=root
2930

3031
[unix_http_server]
3132
file=/var/run/supervisor/supervisor.sock

tests/docker-images/latest-version-image/scripts/func-lib.sh

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,21 +21,30 @@
2121
set -e
2222
set -o pipefail
2323

24+
function set_pulsar_mem() {
25+
local maxMem=$1
26+
local additionalMemParam=$2
27+
local pulsar_test_mem
28+
# set into pulsar_test_mem while trimming whitespace
29+
read -r pulsar_test_mem <<< "-Xmx${maxMem} ${additionalMemParam}"
30+
# prefer PULSAR_MEM, but always append params to perform a heap dump on OOME
31+
export PULSAR_MEM="${PULSAR_MEM:-"${pulsar_test_mem}"} -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/var/log/pulsar -XX:+ExitOnOutOfMemoryError"
32+
}
33+
2434
function run_pulsar_component() {
2535
local component=$1
2636
local supervisord_component=$2
2737
local maxMem=$3
2838
local additionalMemParam=$4
29-
export PULSAR_MEM="${PULSAR_MEM:-"-Xmx${maxMem} ${additionalMemParam} -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/var/log/pulsar -XX:+ExitOnOutOfMemoryError"}"
30-
export PULSAR_GC="${PULSAR_GC:-"-XX:+UseZGC"}"
39+
40+
set_pulsar_mem "$maxMem" "$additionalMemParam"
3141

3242
if [[ -f "conf/${component}.conf" ]]; then
3343
bin/apply-config-from-env.py conf/${component}.conf
3444
fi
35-
bin/apply-config-from-env.py conf/pulsar_env.sh
45+
bin/apply-config-from-env.py conf/client.conf
3646

3747
if [[ "$component" == "functions_worker" ]]; then
38-
bin/apply-config-from-env.py conf/client.conf
3948
bin/gen-yml-from-env.py conf/functions_worker.yml
4049
fi
4150

@@ -46,7 +55,7 @@ function run_pulsar_component() {
4655
fi
4756

4857
if [ -z "$NO_AUTOSTART" ]; then
49-
sed -i 's/autostart=.*/autostart=true/' /etc/supervisord/conf.d/${supervisord_component}.conf
58+
sed -i 's/autostart=.*/autostart=true/' /etc/supervisord/conf.d/${supervisord_component}.conf
5059
fi
5160

5261
exec /usr/bin/supervisord -c /etc/supervisord.conf

tests/docker-images/latest-version-image/scripts/run-standalone.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
# under the License.
1919
#
2020

21-
export PULSAR_MEM="${PULSAR_MEM:-"-Xmx512M -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/var/log/pulsar -XX:+ExitOnOutOfMemoryError"}"
22-
export PULSAR_GC="${PULSAR_GC:-"-XX:+UseZGC"}"
21+
source /pulsar/bin/func-lib.sh
22+
23+
set_pulsar_mem 512M
2324

2425
bin/pulsar standalone

tests/integration/src/test/java/org/apache/pulsar/tests/integration/containers/ChaosContainer.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ protected ChaosContainer(String clusterName, String image) {
4444
@Override
4545
protected void configure() {
4646
super.configure();
47-
addEnv("MALLOC_ARENA_MAX", "1");
4847
}
4948

5049
protected void appendToEnv(String key, String value) {

tests/integration/src/test/java/org/apache/pulsar/tests/integration/profiling/PulsarProfilingTest.java

Lines changed: 129 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@
1818
*/
1919
package org.apache.pulsar.tests.integration.profiling;
2020

21+
import java.io.File;
22+
import java.io.IOException;
23+
import java.io.UncheckedIOException;
24+
import java.nio.file.Files;
25+
import java.nio.file.attribute.PosixFilePermissions;
2126
import java.util.HashMap;
2227
import java.util.List;
2328
import java.util.Map;
@@ -31,6 +36,7 @@
3136
import org.apache.pulsar.tests.integration.suites.PulsarTestSuite;
3237
import org.apache.pulsar.tests.integration.topologies.PulsarClusterSpec;
3338
import org.apache.pulsar.tests.integration.utils.DockerUtils;
39+
import org.testcontainers.containers.BindMode;
3440
import org.testcontainers.containers.GenericContainer;
3541
import org.testng.annotations.Test;
3642

@@ -40,9 +46,9 @@
4046
* Example usage:
4147
* # This has been tested on Mac with Orbstack (https://orbstack.dev/) docker
4248
* # compile integration test dependencies
43-
* mvn -am -pl tests/integration -DskipTests install
49+
* mvn -am -pl tests/integration -Dcheckstyle.skip=true -Dlicense.skip=true -Dspotbugs.skip=true -DskipTests install
4450
* # compile apachepulsar/java-test-image with async profiler (add "clean" to ensure a clean build with recent changes)
45-
* ./build/build_java_test_image.sh -Ddocker.install.asyncprofiler=true
51+
* ./build/build_java_test_image.sh -Ddocker.install.asyncprofiler=true -Pdocker-wolfi
4652
* # set environment variables
4753
* export PULSAR_TEST_IMAGE_NAME=apachepulsar/java-test-image:latest
4854
* export NETTY_LEAK_DETECTION=off
@@ -92,31 +98,98 @@ public PulsarPerfContainer(String clusterName,
9298
createContainerCmd.withName(clusterName + "-" + hostname);
9399
});
94100
withEnv("PULSAR_MEM", DEFAULT_PULSAR_MEM);
101+
withEnv("PULSAR_GC", "-XX:+UseZGC -XX:+ZGenerational");
95102
setCommand("sleep 1000000");
103+
File testOutputDir = new File("target");
104+
if (!testOutputDir.exists()) {
105+
if (!testOutputDir.mkdirs()) {
106+
throw new IllegalArgumentException("Test output directory + '" + testOutputDir.getAbsolutePath()
107+
+ "' doesn't exist and cannot be created.");
108+
}
109+
}
110+
if (!testOutputDir.isDirectory()) {
111+
throw new IllegalArgumentException(
112+
"Test output directory '" + testOutputDir.getAbsolutePath() + "' isn't a directory.");
113+
}
114+
// change access to testOutputDir to allow all access so the the container user can write to it
115+
// This matters only on Linux
116+
try {
117+
Files.setPosixFilePermissions(testOutputDir.toPath(), PosixFilePermissions.fromString("rwxrwxrwx"));
118+
} catch (IOException e) {
119+
throw new UncheckedIOException("Cannot change access to test output directory", e);
120+
}
121+
withFileSystemBind(testOutputDir.getAbsolutePath(), "/testoutput", BindMode.READ_WRITE);
96122
}
97123

98124
public CompletableFuture<Long> consume(String topicName) throws Exception {
99125
return DockerUtils.runCommandAsyncWithLogging(getDockerClient(), getContainerId(),
100-
"/pulsar/bin/pulsar-perf", "consume", topicName,
101-
"-u", "pulsar://" + brokerHostname + ":6650",
102-
"-st", "Shared",
103-
"-aq",
104-
"-m", String.valueOf(numberOfMessages), "-ml", "400M");
126+
"bash", "-c", "echo $$ > /tmp/command.pid; "
127+
+ "/pulsar/bin/pulsar-perf consume " + topicName + " "
128+
+ "-u pulsar://" + brokerHostname + ":6650 "
129+
+ "-st Shared "
130+
+ "-q 50000 "
131+
+ "-m " + numberOfMessages + " -ml 400M "
132+
+ "--histogram-file=/testoutput/consume.histogram.$(date +%s).hdr "
133+
+ "2>&1 | tee /testoutput/consume.$(date +%s).txt");
105134
}
106135

107136
public CompletableFuture<Long> produce(String topicName) throws Exception {
108137
return DockerUtils.runCommandAsyncWithLogging(getDockerClient(), getContainerId(),
109-
"/pulsar/bin/pulsar-perf", "produce", topicName,
110-
"-u", "pulsar://" + brokerHostname + ":6650",
111-
"-au", "http://" + brokerHostname + ":8080",
112-
"-r", String.valueOf(Integer.MAX_VALUE), // max-rate
113-
"-s", "8192", // 8kB message size
114-
"-m", String.valueOf(numberOfMessages), "-ml", "400M");
138+
"bash", "-c", "echo $$ > /tmp/command.pid; "
139+
+ "/pulsar/bin/pulsar-perf produce " + topicName + " "
140+
+ "-u pulsar://" + brokerHostname + ":6650 "
141+
+ "-au http://" + brokerHostname + ":8080 "
142+
+ "-r " + Integer.MAX_VALUE + " "
143+
+ "-s 128 -db "
144+
+ "-o 20000 "
145+
+ "-m " + numberOfMessages + " -ml 400M "
146+
+ "--histogram-file=/testoutput/produce.histogram.$(date +%s).hdr "
147+
+ "2>&1 | tee /testoutput/produce.$(date +%s).txt");
148+
}
149+
150+
public CompletableFuture<Long> stats(String topicName) throws Exception {
151+
String basePath = "http://" + brokerHostname + ":8080/admin/v2/" + topicName.replace("://", "/");
152+
// print out stats and internal stats every 10 seconds
153+
return DockerUtils.runCommandAsyncWithLogging(getDockerClient(), getContainerId(),
154+
"bash", "-c",
155+
String.format("echo $$ > /tmp/command.pid; "
156+
+ "while [[ 1 ]]; do "
157+
+ "curl -s %s/stats | jq | tee /testoutput/stats.$(date +%%s).txt; "
158+
+ "sleep 1; "
159+
+ "curl -s %s/internalStats | jq | tee /testoutput/internal_stats.$(date +%%s).txt; "
160+
+ "curl -s http://%s:8080/metrics/ > /testoutput/metrics.$(date +%%s).txt; "
161+
+ " sleep 10; "
162+
+ "done",
163+
basePath, basePath, brokerHostname));
164+
}
165+
166+
public void triggerShutdown() {
167+
if (isRunning()) {
168+
// attempt to stop containers gracefully
169+
DockerUtils.runCommandAsyncWithLogging(getDockerClient(), getContainerId(),
170+
"bash", "-c", "pkill java; while pgrep -c java; do "
171+
+ "echo Waiting for java processes to stop.; sleep 1; done; "
172+
+ "kill $(cat /tmp/command.pid)")
173+
.orTimeout(10, TimeUnit.SECONDS)
174+
.exceptionally(t -> null)
175+
.join();
176+
}
177+
}
178+
179+
public void stop() {
180+
if (isRunning()) {
181+
// attempt to stop containers gracefully
182+
dockerClient.stopContainerCmd(getContainerId())
183+
.withTimeout(15)
184+
.exec();
185+
}
186+
super.stop();
115187
}
116188
}
117189

118190
private PulsarPerfContainer perfConsume;
119191
private PulsarPerfContainer perfProduce;
192+
private PulsarPerfContainer printStats;
120193

121194
@Override
122195
public void setupCluster() throws Exception {
@@ -126,14 +199,27 @@ public void setupCluster() throws Exception {
126199

127200
@Override
128201
public void tearDownCluster() throws Exception {
202+
if (printStats != null) {
203+
printStats.triggerShutdown();
204+
}
205+
if (perfProduce != null) {
206+
perfProduce.triggerShutdown();
207+
}
129208
if (perfConsume != null) {
130-
perfConsume.stop();
131-
perfConsume = null;
209+
perfConsume.triggerShutdown();
210+
}
211+
if (printStats != null) {
212+
printStats.stop();
213+
printStats = null;
132214
}
133215
if (perfProduce != null) {
134216
perfProduce.stop();
135217
perfProduce = null;
136218
}
219+
if (perfConsume != null) {
220+
perfConsume.stop();
221+
perfConsume = null;
222+
}
137223
super.tearDownCluster();
138224
}
139225

@@ -142,7 +228,10 @@ protected void beforeStartCluster() throws Exception {
142228
super.beforeStartCluster();
143229
pulsarCluster.forEachContainer(
144230
// This is effective only when -Pdocker-wolfi has been passed when building java-test-image
145-
c -> c.withEnv("GLIBC_TUNABLES", "glibc.malloc.hugetlb=1:glibc.malloc.mmap_threshold=2097152"));
231+
// setting mmap_threshold explicitly will avoid it's dynamic increase
232+
// https://sourceware.org/glibc/manual/latest/html_node/Memory-Allocation-Tunables.html
233+
c -> c.withEnv("GLIBC_TUNABLES",
234+
"glibc.malloc.hugetlb=1:glibc.malloc.mmap_threshold=131072:glibc.malloc.arena_max=4"));
146235
}
147236

148237
@Override
@@ -160,15 +249,25 @@ protected PulsarClusterSpec.PulsarClusterSpecBuilder beforeSetupCluster(String c
160249
specBuilder.numProxies(0);
161250

162251
// Increase memory for brokers and configure more aggressive rollover
163-
specBuilder.brokerEnvs(Map.of("PULSAR_MEM", BROKER_PULSAR_MEM,
164-
"managedLedgerMinLedgerRolloverTimeMinutes", "1",
165-
"managedLedgerMaxLedgerRolloverTimeMinutes", "5",
166-
"managedLedgerMaxSizePerLedgerMbytes", "512",
167-
"managedLedgerDefaultEnsembleSize", "1",
168-
"managedLedgerDefaultWriteQuorum", "1",
169-
"managedLedgerDefaultAckQuorum", "1",
170-
"maxPendingPublishRequestsPerConnection", "100000"
171-
));
252+
Map<String, String> brokerEnvs = new HashMap<>();
253+
brokerEnvs.put("PULSAR_MEM", BROKER_PULSAR_MEM);
254+
brokerEnvs.put("managedLedgerMinLedgerRolloverTimeMinutes", "1");
255+
brokerEnvs.put("managedLedgerMaxLedgerRolloverTimeMinutes", "5");
256+
brokerEnvs.put("managedLedgerMaxSizePerLedgerMbytes", "512");
257+
brokerEnvs.put("managedLedgerDefaultEnsembleSize", "1");
258+
brokerEnvs.put("managedLedgerDefaultWriteQuorum", "1");
259+
brokerEnvs.put("managedLedgerDefaultAckQuorum", "1");
260+
//brokerEnvs.put("maxPendingPublishRequestsPerConnection", "1000");
261+
brokerEnvs.put("dispatcherRetryBackoffInitialTimeInMs", "0");
262+
brokerEnvs.put("dispatcherRetryBackoffMaxTimeInMs", "0");
263+
brokerEnvs.put("preciseDispatcherFlowControl", "true");
264+
//brokerEnvs.put("PULSAR_PREFIX_subscriptionKeySharedUseClassicPersistentImplementation", "true");
265+
//brokerEnvs.put("PULSAR_PREFIX_subscriptionSharedUseClassicPersistentImplementation", "true");
266+
brokerEnvs.put("dispatcherMaxReadBatchSize", "1000");
267+
//brokerEnvs.put("dispatcherMaxReadSizeBytes", "10000000");
268+
//brokerEnvs.put("dispatcherDispatchMessagesInSubscriptionThread", "false");
269+
//brokerEnvs.put("dispatcherMaxRoundRobinBatchSize", "1000");
270+
specBuilder.brokerEnvs(brokerEnvs);
172271

173272
// Increase memory for bookkeepers and make compaction run more often
174273
Map<String, String> bkEnv = new HashMap<>();
@@ -190,9 +289,11 @@ protected PulsarClusterSpec.PulsarClusterSpecBuilder beforeSetupCluster(String c
190289
String brokerHostname = clusterName + "-pulsar-broker-0";
191290
perfProduce = new PulsarPerfContainer(clusterName, brokerHostname, "perf-produce");
192291
perfConsume = new PulsarPerfContainer(clusterName, brokerHostname, "perf-consume");
292+
printStats = new PulsarPerfContainer(clusterName, brokerHostname, "print-stats");
193293
specBuilder.externalServices(Map.of(
194294
"pulsar-produce", perfProduce,
195-
"pulsar-consume", perfConsume
295+
"pulsar-consume", perfConsume,
296+
"print-stats", printStats
196297
));
197298

198299
return specBuilder;
@@ -204,6 +305,8 @@ public void runPulsarPerf() throws Exception {
204305
CompletableFuture<Long> consumeFuture = perfConsume.consume(topicName);
205306
Thread.sleep(1000);
206307
CompletableFuture<Long> produceFuture = perfProduce.produce(topicName);
308+
Thread.sleep(4000);
309+
printStats.stats(topicName);
207310
FutureUtil.waitForAll(List.of(consumeFuture, produceFuture))
208311
.orTimeout(3, TimeUnit.MINUTES)
209312
.exceptionally(t -> {

0 commit comments

Comments
 (0)