Skip to content

Commit

Permalink
Fix that e2e fails occasionally (#3125)
Browse files Browse the repository at this point in the history
* Override ES settings in FAQ

* Resend traffic data when invalid

* Remove unsupported ES setting

* Use seperated buffer directory and remove unnecessary web app 2
  • Loading branch information
kezhenxu94 authored and wu-sheng committed Jul 20, 2019
1 parent 1a36c38 commit 758b085
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 56 deletions.
2 changes: 2 additions & 0 deletions test/e2e/e2e-cluster/test-runner/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@
</wait>
<env>
<discovery.type>single-node</discovery.type>
<thread_pool.index.queue_size>500</thread_pool.index.queue_size>
<thread_pool.write.queue_size>500</thread_pool.write.queue_size>
</env>
</run>
</image>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ if test "${MODE}" = "cluster"; then
&& mv clusterized_app.yml application.yml

cd ${SW_HOME}/webapp \
&& awk '/^\s+listOfServers/ {gsub("127.0.0.1:12800", "127.0.0.1:12800,127.0.0.1:12801", $0)} {print}' webapp.yml > clusterized_webapp.yml \
&& awk '/^\s+listOfServers:/ {gsub("listOfServers:.*", "listOfServers: 127.0.0.1:12800,127.0.0.1:12801", $0)} {print}' webapp.yml > clusterized_webapp.yml \
&& mv clusterized_webapp.yml webapp.yml

cd ${original_wd}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
echo 'starting OAP server...' \
&& SW_STORAGE_ES_BULK_ACTIONS=1 \
&& SW_STORAGE_ES_FLUSH_INTERVAL=1 \
&& SW_RECEIVER_BUFFER_PATH=/tmp/oap/trace_buffer1 \
&& SW_SERVICE_MESH_BUFFER_PATH=/tmp/oap/mesh_buffer1 \
&& start_oap 'init'

echo 'starting Web app...' \
Expand All @@ -31,11 +33,9 @@ if test "${MODE}" = "cluster"; then
&& SW_CORE_REST_PORT=12801 \
&& SW_STORAGE_ES_BULK_ACTIONS=1 \
&& SW_STORAGE_ES_FLUSH_INTERVAL=1 \
&& SW_RECEIVER_BUFFER_PATH=/tmp/oap/trace_buffer2 \
&& SW_SERVICE_MESH_BUFFER_PATH=/tmp/oap/mesh_buffer2 \
&& start_oap 'no-init'

# start another WebApp server in a different port
echo 'starting Web app...' \
&& start_webapp '0.0.0.0' 8082
fi

echo 'starting instrumented services...' && start_instrumented_services
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,33 +95,21 @@ public void setUp() {
public void verify() throws Exception {
LocalDateTime startTime = LocalDateTime.now(ZoneOffset.UTC);

final Map<String, String> user = new HashMap<>();
user.put("name", "SkyWalking");
// minimum guarantee that the instrumented services registered
// which is the prerequisite of following verifications(service instance, service metrics, etc.)
List<Service> services = Collections.emptyList();
while (services.size() < 2) {
try {
restTemplate.postForEntity(
instrumentedServiceUrl + "/e2e/users",
user,
String.class
);
services = queryClient.services(
new ServicesQuery()
.start(startTime)
.end(LocalDateTime.now(ZoneOffset.UTC).plusMinutes(1))
);
Thread.sleep(500); // take a nap to avoid high payload
} catch (Throwable ignored) {
}
}

final ResponseEntity<String> responseEntity = restTemplate.postForEntity(
instrumentedServiceUrl + "/e2e/users",
user,
String.class
);
LOGGER.info("responseEntity: {}, {}", responseEntity.getStatusCode(), responseEntity.getBody());
assertThat(responseEntity.getStatusCode()).isEqualTo(HttpStatus.OK);

verifyTraces(startTime);

verifyServices(startTime);
Expand Down Expand Up @@ -155,7 +143,7 @@ private void verifyServices(LocalDateTime minutesAgo) throws Exception {
services = queryClient.services(
new ServicesQuery()
.start(minutesAgo)
.end(LocalDateTime.now(ZoneOffset.UTC).plusMinutes(1))
.end(LocalDateTime.now(ZoneOffset.UTC))
);
Thread.sleep(retryInterval);
}
Expand Down Expand Up @@ -189,14 +177,15 @@ private Instances verifyServiceInstances(LocalDateTime minutesAgo, Service servi
.end(LocalDateTime.now(ZoneOffset.UTC).plusMinutes(1))
);
while (instances == null) {
LOGGER.warn("instances is null, will retry to query");
LOGGER.warn("instances is null, will send traffic data and retry to query");
generateTraffic();
Thread.sleep(retryInterval);
instances = queryClient.instances(
new InstancesQuery()
.serviceId(service.getKey())
.start(minutesAgo)
.end(LocalDateTime.now(ZoneOffset.UTC).plusMinutes(1))
);
Thread.sleep(retryInterval);
}
InputStream expectedInputStream =
new ClassPathResource("expected-data/org.apache.skywalking.e2e.ClusterVerificationITCase.instances.yml").getInputStream();
Expand All @@ -210,11 +199,12 @@ private Endpoints verifyServiceEndpoints(LocalDateTime minutesAgo, Service servi
new EndpointQuery().serviceId(service.getKey())
);
while (endpoints == null) {
LOGGER.warn("endpoints is null, will retry to query");
LOGGER.warn("endpoints is null, will send traffic data and retry to query");
generateTraffic();
Thread.sleep(retryInterval);
endpoints = queryClient.endpoints(
new EndpointQuery().serviceId(service.getKey())
);
Thread.sleep(retryInterval);
}
InputStream expectedInputStream =
new ClassPathResource("expected-data/org.apache.skywalking.e2e.ClusterVerificationITCase.endpoints.yml").getInputStream();
Expand All @@ -228,8 +218,8 @@ private void verifyInstancesMetrics(Instances instances, final LocalDateTime min
for (String metricsName : ALL_INSTANCE_METRICS) {
LOGGER.info("verifying service instance response time: {}", instance);

boolean matched = false;
while (!matched) {
boolean valid = false;
while (!valid) {
LOGGER.warn("instanceRespTime is null, will retry to query");
Metrics instanceRespTime = queryClient.metrics(
new MetricsQuery()
Expand All @@ -245,8 +235,9 @@ private void verifyInstancesMetrics(Instances instances, final LocalDateTime min
instanceRespTimeMatcher.setValue(greaterThanZero);
try {
instanceRespTimeMatcher.verify(instanceRespTime);
matched = true;
valid = true;
} catch (Throwable ignored) {
generateTraffic();
Thread.sleep(retryInterval);
}
LOGGER.info("{}: {}", metricsName, instanceRespTime);
Expand All @@ -263,28 +254,28 @@ private void verifyEndpointsMetrics(Endpoints endpoints, final LocalDateTime min
for (String metricName : ALL_ENDPOINT_METRICS) {
LOGGER.info("verifying endpoint {}, metrics: {}", endpoint, metricName);

boolean matched = false;
while (!matched) {
LOGGER.warn("serviceMetrics is null, will retry to query");
Metrics metrics = queryClient.metrics(
boolean valid = false;
while (!valid) {
Metrics endpointMetrics = queryClient.metrics(
new MetricsQuery()
.stepByMinute()
.metricsName(metricName)
.start(minutesAgo)
.end(LocalDateTime.now(ZoneOffset.UTC).plusMinutes(1))
.end(LocalDateTime.now(ZoneOffset.UTC))
.id(endpoint.getKey())
);
AtLeastOneOfMetricsMatcher instanceRespTimeMatcher = new AtLeastOneOfMetricsMatcher();
MetricsValueMatcher greaterThanZero = new MetricsValueMatcher();
greaterThanZero.setValue("gt 0");
instanceRespTimeMatcher.setValue(greaterThanZero);
try {
instanceRespTimeMatcher.verify(metrics);
matched = true;
instanceRespTimeMatcher.verify(endpointMetrics);
valid = true;
} catch (Throwable ignored) {
generateTraffic();
Thread.sleep(retryInterval);
}
LOGGER.info("metrics: {}", metrics);
LOGGER.info("{}: {}", metricName, endpointMetrics);
}
}
}
Expand All @@ -294,14 +285,14 @@ private void verifyServiceMetrics(Service service, final LocalDateTime minutesAg
for (String metricName : ALL_SERVICE_METRICS) {
LOGGER.info("verifying service {}, metrics: {}", service, metricName);

boolean matched = false;
while (!matched) {
boolean valid = false;
while (!valid) {
Metrics serviceMetrics = queryClient.metrics(
new MetricsQuery()
.stepByMinute()
.metricsName(metricName)
.start(minutesAgo)
.end(LocalDateTime.now(ZoneOffset.UTC).plusMinutes(1))
.end(LocalDateTime.now(ZoneOffset.UTC))
.id(service.getKey())
);
AtLeastOneOfMetricsMatcher instanceRespTimeMatcher = new AtLeastOneOfMetricsMatcher();
Expand All @@ -310,33 +301,28 @@ private void verifyServiceMetrics(Service service, final LocalDateTime minutesAg
instanceRespTimeMatcher.setValue(greaterThanZero);
try {
instanceRespTimeMatcher.verify(serviceMetrics);
matched = true;
valid = true;
} catch (Throwable ignored) {
generateTraffic();
Thread.sleep(retryInterval);
}
LOGGER.info("serviceMetrics: {}", serviceMetrics);
LOGGER.info("{}: {}", metricName, serviceMetrics);
}
}
}

private void verifyTraces(LocalDateTime minutesAgo) throws Exception {
List<Trace> traces = queryClient.traces(
new TracesQuery()
.stepBySecond()
.start(minutesAgo)
.end(LocalDateTime.now(ZoneOffset.UTC).plusMinutes(1))
.orderByStartTime()
);
final TracesQuery query = new TracesQuery()
.stepBySecond()
.start(minutesAgo)
.orderByStartTime();

List<Trace> traces = queryClient.traces(query.end(LocalDateTime.now(ZoneOffset.UTC)));
while (traces.isEmpty()) {
LOGGER.warn("traces is empty, will retry to query");
traces = queryClient.traces(
new TracesQuery()
.stepBySecond()
.start(minutesAgo)
.end(LocalDateTime.now(ZoneOffset.UTC).plusMinutes(1))
.orderByStartTime()
);
LOGGER.warn("traces is empty, will generate traffic data and retry");
generateTraffic();
Thread.sleep(retryInterval);
traces = queryClient.traces(query.end(LocalDateTime.now(ZoneOffset.UTC)));
}

InputStream expectedInputStream =
Expand All @@ -345,4 +331,16 @@ private void verifyTraces(LocalDateTime minutesAgo) throws Exception {
final TracesMatcher tracesMatcher = yaml.loadAs(expectedInputStream, TracesMatcher.class);
tracesMatcher.verifyLoosely(traces);
}

private void generateTraffic() {
final Map<String, String> user = new HashMap<>();
user.put("name", "SkyWalking");
final ResponseEntity<String> responseEntity = restTemplate.postForEntity(
instrumentedServiceUrl + "/e2e/users",
user,
String.class
);
LOGGER.info("responseEntity: {}, {}", responseEntity.getStatusCode(), responseEntity.getBody());
assertThat(responseEntity.getStatusCode()).isEqualTo(HttpStatus.OK);
}
}

0 comments on commit 758b085

Please sign in to comment.