-
Notifications
You must be signed in to change notification settings - Fork 768
Broken tests from closing meter providers on core unload and reload #3707
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -130,11 +130,11 @@ grant { | |
permission javax.management.MBeanServerPermission "findMBeanServer"; | ||
permission javax.management.MBeanServerPermission "releaseMBeanServer"; | ||
permission javax.management.MBeanTrustPermission "register"; | ||
|
||
// needed by crossdc | ||
permission javax.security.auth.AuthPermission "getLoginConfiguration"; | ||
permission javax.security.auth.AuthPermission "setLoginConfiguration"; | ||
|
||
// needed by benchmark | ||
permission java.security.SecurityPermission "insertProvider"; | ||
|
||
|
@@ -206,7 +206,7 @@ grant { | |
|
||
// additional permissions based on system properties set by /bin/solr | ||
// NOTE: if the property is not set, the permission entry is ignored. | ||
grant { | ||
grant { | ||
permission java.io.FilePermission "${solr.jetty.keystore}", "read,write,delete,readlink"; | ||
permission java.io.FilePermission "${solr.jetty.keystore}${/}-", "read,write,delete,readlink"; | ||
|
||
|
@@ -277,3 +277,7 @@ grant { | |
// Allow testing effects of customized or bug-fixed dependencies locally (also need to add mavenLocal() to build) | ||
permission java.io.FilePermission "${user.home}${/}.m2${/}repository${/}-", "read"; | ||
}; | ||
|
||
grant { | ||
permission jdk.jfr.FlightRecorderPermission "accessFlightRecorder"; | ||
}; | ||
Comment on lines
+281
to
+283
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added this because of JFR for JVM metrics from the OTEL library |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,10 +16,9 @@ | |
*/ | ||
package org.apache.solr.metrics; | ||
|
||
import static org.apache.solr.metrics.SolrMetricProducer.HANDLER_ATTR; | ||
|
||
import com.codahale.metrics.MetricRegistry; | ||
import io.opentelemetry.api.common.AttributeKey; | ||
import io.opentelemetry.api.common.Attributes; | ||
import java.io.Closeable; | ||
import java.io.IOException; | ||
import java.util.ArrayList; | ||
|
@@ -57,7 +56,7 @@ public class SolrCoreMetricManager implements Closeable { | |
// rename | ||
private final List<MetricProducerInfo> registeredProducers = new ArrayList<>(); | ||
|
||
private record MetricProducerInfo(SolrMetricProducer producer, String scope) {} | ||
private record MetricProducerInfo(SolrMetricProducer producer, Attributes attributes) {} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did a little bit of cleanup on this because of this weird scope string. |
||
|
||
/** | ||
* Constructs a metric manager. | ||
|
@@ -132,11 +131,10 @@ public void reregisterCoreMetrics() { | |
|
||
registeredProducers.forEach( | ||
metricProducer -> { | ||
var producerAttributes = core.getCoreAttributes().toBuilder(); | ||
if (metricProducer.scope().startsWith("/")) | ||
producerAttributes.put(HANDLER_ATTR, metricProducer.scope); | ||
metricProducer.producer.initializeMetrics( | ||
solrMetricsContext, producerAttributes.build(), metricProducer.scope); | ||
solrMetricsContext, | ||
metricProducer.attributes.toBuilder().putAll(core.getCoreAttributes()).build(), | ||
""); | ||
}); | ||
} | ||
|
||
|
@@ -145,29 +143,28 @@ public void reregisterCoreMetrics() { | |
* set of attributes for core level metrics. All metric producers are tracked for re-registering | ||
* in the case of core swapping/renaming | ||
* | ||
* @param scope the scope of the metrics to be registered (e.g. `/admin/ping`) | ||
* @param producer producer of metrics to be registered | ||
* @param attributes | ||
*/ | ||
public void registerMetricProducer(String scope, SolrMetricProducer producer) { | ||
if (scope == null || producer == null) { | ||
public void registerMetricProducer(SolrMetricProducer producer, Attributes attributes) { | ||
if (attributes == null || producer == null) { | ||
throw new IllegalArgumentException( | ||
"registerMetricProducer() called with illegal arguments: " | ||
+ "scope = " | ||
+ scope | ||
+ "attributes = " | ||
+ attributes | ||
+ ", producer = " | ||
+ producer); | ||
} | ||
|
||
// Track this producer for potential re-initialization during core rename | ||
registeredProducers.add(new MetricProducerInfo(producer, scope)); | ||
registeredProducers.add(new MetricProducerInfo(producer, attributes)); | ||
|
||
// TODO: We initialize metrics with attributes of the core. This happens again in | ||
// reregisterCoreMetrics | ||
// There is some possible improvement that can be done here to not have to duplicate code in | ||
// reregisterCoreMetrics | ||
var attributesBuilder = core.getCoreAttributes().toBuilder(); | ||
if (scope.startsWith("/")) attributesBuilder.put(HANDLER_ATTR, scope); | ||
producer.initializeMetrics(solrMetricsContext, attributesBuilder.build(), scope); | ||
producer.initializeMetrics( | ||
solrMetricsContext, attributes.toBuilder().putAll(core.getCoreAttributes()).build(), ""); | ||
} | ||
|
||
/** Return the registry used by this SolrCore. */ | ||
|
@@ -190,9 +187,6 @@ public void close() throws IOException { | |
} | ||
metricManager.unregisterGauges( | ||
solrMetricsContext.getRegistryName(), solrMetricsContext.getTag()); | ||
|
||
metricManager.removeRegistry(solrMetricsContext.getRegistryName()); | ||
registeredProducers.clear(); | ||
Comment on lines
-194
to
-195
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So I think I found the culprit for reload and metrics not working correctly. I had added to delete the registry here for OTEL metrics but Dropwizard didn't delete the registry but instead just unregistered gauges which isn't all metrics. So I removed this and tests all pass like normal. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are you saying that a core's metrics are mostly still accessible, even after a core is deleted? Technically you didn't, this code is in the context of "close"... but I'd hope that a high level command to remove/delete a core would do similarly for the underlying metrics. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Metrics are not accessible on core delete or unload. Those metrics are completely gone and the registry goes with it. This change was for the reload scenario. Turns out, it didn't reset normal counters and meters in Dropwizard and never actually deletes the registry. Only the gauges it seems. |
||
} | ||
|
||
public SolrMetricsContext getSolrMetricsContext() { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -98,7 +98,6 @@ | |
import org.apache.solr.logging.MDCLoggingContext; | ||
import org.apache.solr.metrics.otel.FilterablePrometheusMetricReader; | ||
import org.apache.solr.metrics.otel.MetricExporterFactory; | ||
import org.apache.solr.metrics.otel.NoopMetricExporter; | ||
import org.apache.solr.metrics.otel.OtelUnit; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
@@ -889,12 +888,11 @@ private static MetricRegistry getOrCreateRegistry( | |
*/ | ||
// NOCOMMIT: Remove this | ||
public void removeRegistry(String registry) { | ||
meterProviderAndReaders.computeIfPresent( | ||
enforcePrefix(registry), | ||
(key, meterAndReader) -> { | ||
IOUtils.closeQuietly(meterAndReader.sdkMeterProvider()); | ||
return null; | ||
}); | ||
String key = enforcePrefix(registry); | ||
MeterProviderAndReaders mpr = meterProviderAndReaders.remove(key); | ||
if (mpr != null) { | ||
IOUtils.closeQuietly(mpr.sdkMeterProvider()); | ||
} | ||
Comment on lines
+891
to
+895
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This here was difficult to track down. Tests were running into a dead lock because of this. What I had found is that when cores were being closed the Meter providers were going with it. The periodic metric reader was being registered even when the exporter was a Noop. When the periodic metric reader is closed, it triggers one last metric collection for the exporter calling the observable instruments callbacks holding the lock for that concurrent hashmaps bucket. One of the callbacks was getting index size in SolrCore which had a searcher lock. At the same time, the Solr core closing looked to hold that same searcher lock creating a deadlock. Basically from what I can tell: Thread A: holds ConcurrentHashMaps bucket lock → waiting for searcherLock (inside the callback). Deadlock! So changed this to just close the provider without the computeIfPresent. |
||
} | ||
|
||
/** Close all meter providers and their associated metric readers. */ | ||
|
@@ -1769,7 +1767,7 @@ public MetricExporter getMetricExporter() { | |
} | ||
|
||
private MetricExporter loadMetricExporter(SolrResourceLoader loader) { | ||
if (!OTLP_EXPORTER_ENABLED) return new NoopMetricExporter(); | ||
if (!OTLP_EXPORTER_ENABLED) return null; | ||
try { | ||
MetricExporterFactory exporterFactory = | ||
loader.newInstance( | ||
|
@@ -1778,7 +1776,7 @@ private MetricExporter loadMetricExporter(SolrResourceLoader loader) { | |
} catch (SolrException e) { | ||
log.error( | ||
"Could not load OTLP exporter. Check that the Open Telemetry module is enabled.", e); | ||
return new NoopMetricExporter(); | ||
return null; | ||
} | ||
} | ||
|
||
|
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Accidental? But it's okay
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not accident. Needed it for
JvmMetricsTest
.