New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Enable analytics geoip in behavioral analytics. #96624
Changes from 24 commits
823e276
4bf04ae
6f0767f
97009d6
b86bbb9
7ee7a2e
c8158fc
051d5d4
c257f07
9a3a079
abb80c0
f44caf6
728c18f
199ccd2
e4db7aa
ca4e909
62a6ab8
ef123f7
c3baf3d
6489976
6ade089
90e65fa
9dc3f81
31082be
22aa720
022a6ec
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
pr: 96624 | ||
summary: Enable analytics geoip in behavioral analytics | ||
area: "Application" | ||
type: feature | ||
issues: [] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,6 +27,7 @@ | |
import org.elasticsearch.core.TimeValue; | ||
import org.elasticsearch.gateway.GatewayService; | ||
import org.elasticsearch.index.Index; | ||
import org.elasticsearch.index.IndexSettings; | ||
import org.elasticsearch.ingest.IngestMetadata; | ||
import org.elasticsearch.ingest.IngestService; | ||
import org.elasticsearch.ingest.Pipeline; | ||
|
@@ -45,9 +46,11 @@ | |
import java.util.Objects; | ||
import java.util.concurrent.atomic.AtomicBoolean; | ||
import java.util.concurrent.atomic.AtomicReference; | ||
import java.util.stream.Collectors; | ||
|
||
import static org.elasticsearch.ingest.geoip.GeoIpDownloader.DATABASES_INDEX; | ||
import static org.elasticsearch.ingest.geoip.GeoIpDownloader.GEOIP_DOWNLOADER; | ||
import static org.elasticsearch.ingest.geoip.GeoIpProcessor.Factory.downloadDatabaseOnPipelineCreation; | ||
|
||
/** | ||
* Persistent task executor that is responsible for starting {@link GeoIpDownloader} after task is allocated by master node. | ||
|
@@ -207,7 +210,14 @@ public void clusterChanged(ClusterChangedEvent event) { | |
} | ||
} | ||
|
||
if (event.metadataChanged() && event.changedCustomMetadataSet().contains(IngestMetadata.TYPE)) { | ||
if (event.metadataChanged() == false) { | ||
return; | ||
} | ||
|
||
boolean hasIndicesChanges = event.previousState().metadata().indices().equals(event.state().metadata().indices()) == false; | ||
boolean hasIngestPipelineChanges = event.changedCustomMetadataSet().contains(IngestMetadata.TYPE); | ||
|
||
if (hasIngestPipelineChanges || hasIndicesChanges) { | ||
boolean newAtLeastOneGeoipProcessor = hasAtLeastOneGeoipProcessor(event.state()); | ||
if (newAtLeastOneGeoipProcessor && atLeastOneGeoipProcessor == false) { | ||
atLeastOneGeoipProcessor = true; | ||
|
@@ -222,41 +232,112 @@ public void clusterChanged(ClusterChangedEvent event) { | |
} | ||
} | ||
|
||
@SuppressWarnings("unchecked") | ||
static boolean hasAtLeastOneGeoipProcessor(ClusterState clusterState) { | ||
List<PipelineConfiguration> pipelineDefinitions = IngestService.getPipelines(clusterState); | ||
return pipelineDefinitions.stream().anyMatch(pipelineDefinition -> { | ||
Map<String, Object> pipelineMap = pipelineDefinition.getConfigAsMap(); | ||
return hasAtLeastOneGeoipProcessor((List<Map<String, Object>>) pipelineMap.get(Pipeline.PROCESSORS_KEY)); | ||
if (pipelineConfigurationsWithGeoIpProcessor(clusterState, true).isEmpty() == false) { | ||
return true; | ||
} | ||
|
||
List<String> checkReferencedPipelines = pipelineConfigurationsWithGeoIpProcessor(clusterState, false).stream() | ||
.map(PipelineConfiguration::getId) | ||
.collect(Collectors.toList()); | ||
|
||
if (checkReferencedPipelines.isEmpty()) { | ||
return false; | ||
} | ||
|
||
return clusterState.getMetadata().indices().values().stream().anyMatch(indexMetadata -> { | ||
String defaultPipeline = IndexSettings.DEFAULT_PIPELINE.get(indexMetadata.getSettings()); | ||
String finalPipeline = IndexSettings.FINAL_PIPELINE.get(indexMetadata.getSettings()); | ||
return checkReferencedPipelines.contains(defaultPipeline) || checkReferencedPipelines.contains(finalPipeline); | ||
}); | ||
} | ||
|
||
private static boolean hasAtLeastOneGeoipProcessor(List<Map<String, Object>> processors) { | ||
return processors != null && processors.stream().anyMatch(GeoIpDownloaderTaskExecutor::hasAtLeastOneGeoipProcessor); | ||
/** | ||
* Retrieve list of pipelines that have at least one geoip processor. | ||
* @param clusterState Cluster state. | ||
* @param downloadDatabaseOnPipelineCreation Filter the list to include only pipeline with the download_database_on_pipeline_creation | ||
* matching the param. | ||
* @return A list of {@link PipelineConfiguration} matching criteria. | ||
*/ | ||
@SuppressWarnings("unchecked") | ||
private static List<PipelineConfiguration> pipelineConfigurationsWithGeoIpProcessor( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be good to have some javadocs for these methods. Specifically it would be good documenting what the input args are (the code makes sense when you get into it, but it's not intuitive to me what impact downloadDatabaseOnPipelineCreation has on the pipelineConfigurationsWithGeoIpProcessor that are returned from this method from just looking at the method signature).. |
||
ClusterState clusterState, | ||
boolean downloadDatabaseOnPipelineCreation | ||
) { | ||
List<PipelineConfiguration> pipelineDefinitions = IngestService.getPipelines(clusterState); | ||
return pipelineDefinitions.stream().filter(pipelineConfig -> { | ||
List<Map<String, Object>> processors = (List<Map<String, Object>>) pipelineConfig.getConfigAsMap().get(Pipeline.PROCESSORS_KEY); | ||
return hasAtLeastOneGeoipProcessor(processors, downloadDatabaseOnPipelineCreation); | ||
}).collect(Collectors.toList()); | ||
} | ||
|
||
private static boolean hasAtLeastOneGeoipProcessor(Map<String, Object> processor) { | ||
return processor != null | ||
&& (processor.containsKey(GeoIpProcessor.TYPE) | ||
|| isProcessorWithOnFailureGeoIpProcessor(processor) | ||
|| isForeachProcessorWithGeoipProcessor(processor)); | ||
/** | ||
* Check if a list of processor contains at least a geoip processor. | ||
* @param processors List of processors. | ||
* @param downloadDatabaseOnPipelineCreation Should the download_database_on_pipeline_creation of the geoip processor be true or false. | ||
* @return true if a geoip processor is found in the processor list. | ||
*/ | ||
private static boolean hasAtLeastOneGeoipProcessor(List<Map<String, Object>> processors, boolean downloadDatabaseOnPipelineCreation) { | ||
return processors != null && processors.stream().anyMatch(p -> hasAtLeastOneGeoipProcessor(p, downloadDatabaseOnPipelineCreation)); | ||
} | ||
|
||
/** | ||
* Check if a processor config is a geoip processor or contains at least a geoip processor. | ||
* @param processor Processor config. | ||
* @param downloadDatabaseOnPipelineCreation Should the download_database_on_pipeline_creation of the geoip processor be true or false. | ||
* @return true if a geoip processor is found in the processor list. | ||
*/ | ||
@SuppressWarnings("unchecked") | ||
private static boolean hasAtLeastOneGeoipProcessor(Map<String, Object> processor, boolean downloadDatabaseOnPipelineCreation) { | ||
if (processor == null) { | ||
return false; | ||
} | ||
|
||
if (processor.containsKey(GeoIpProcessor.TYPE)) { | ||
Map<String, Object> processorConfig = (Map<String, Object>) processor.get(GeoIpProcessor.TYPE); | ||
return downloadDatabaseOnPipelineCreation(processorConfig) == downloadDatabaseOnPipelineCreation; | ||
} | ||
|
||
return isProcessorWithOnFailureGeoIpProcessor(processor, downloadDatabaseOnPipelineCreation) | ||
|| isForeachProcessorWithGeoipProcessor(processor, downloadDatabaseOnPipelineCreation); | ||
} | ||
|
||
/** | ||
* Check if a processor config is has an on_failure clause containing at least a geoip processor. | ||
* @param processor Processor config. | ||
* @param downloadDatabaseOnPipelineCreation Should the download_database_on_pipeline_creation of the geoip processor be true or false. | ||
* @return true if a geoip processor is found in the processor list. | ||
*/ | ||
@SuppressWarnings("unchecked") | ||
private static boolean isProcessorWithOnFailureGeoIpProcessor(Map<String, Object> processor) { | ||
private static boolean isProcessorWithOnFailureGeoIpProcessor( | ||
Map<String, Object> processor, | ||
boolean downloadDatabaseOnPipelineCreation | ||
) { | ||
return processor != null | ||
&& processor.values() | ||
.stream() | ||
.anyMatch( | ||
value -> value instanceof Map | ||
&& hasAtLeastOneGeoipProcessor(((Map<String, List<Map<String, Object>>>) value).get("on_failure")) | ||
&& hasAtLeastOneGeoipProcessor( | ||
((Map<String, List<Map<String, Object>>>) value).get("on_failure"), | ||
downloadDatabaseOnPipelineCreation | ||
) | ||
); | ||
} | ||
|
||
/** | ||
* Check if a processor is a foreach processor containing at least a geoip processor. | ||
* @param processor Processor config. | ||
* @param downloadDatabaseOnPipelineCreation Should the download_database_on_pipeline_creation of the geoip processor be true or false. | ||
* @return true if a geoip processor is found in the processor list. | ||
*/ | ||
@SuppressWarnings("unchecked") | ||
private static boolean isForeachProcessorWithGeoipProcessor(Map<String, Object> processor) { | ||
private static boolean isForeachProcessorWithGeoipProcessor(Map<String, Object> processor, boolean downloadDatabaseOnPipelineCreation) { | ||
return processor.containsKey("foreach") | ||
&& hasAtLeastOneGeoipProcessor(((Map<String, Map<String, Object>>) processor.get("foreach")).get("processor")); | ||
&& hasAtLeastOneGeoipProcessor( | ||
((Map<String, Map<String, Object>>) processor.get("foreach")).get("processor"), | ||
downloadDatabaseOnPipelineCreation | ||
); | ||
} | ||
|
||
private void startTask(Runnable onFailure) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could this be a Set since all interactions with it are via
contains
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good idea. I did pushed an update to use a
Set