-
Notifications
You must be signed in to change notification settings - Fork 2.9k
NIFI-2142 Cache compiled XSLT in TransformXml #609
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from all commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
2659547
Added Guava cache for XSLT stylesheets in TransformXml
jfrazee 8024699
Fix contrib-check violations
jfrazee 6219e56
Made property docs more clear, changed cache size to Integer
jfrazee 687fe2f
Updated docs for cache TTL, no longer create cache if size is 0
jfrazee File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,7 +28,10 @@ | |
| import java.util.Set; | ||
| import java.util.concurrent.TimeUnit; | ||
|
|
||
| import javax.xml.transform.OutputKeys; | ||
| import javax.xml.transform.Templates; | ||
| import javax.xml.transform.Transformer; | ||
| import javax.xml.transform.TransformerConfigurationException; | ||
| import javax.xml.transform.TransformerFactory; | ||
| import javax.xml.transform.stream.StreamResult; | ||
| import javax.xml.transform.stream.StreamSource; | ||
|
|
@@ -41,6 +44,7 @@ | |
| import org.apache.nifi.annotation.behavior.SupportsBatching; | ||
| import org.apache.nifi.annotation.documentation.CapabilityDescription; | ||
| import org.apache.nifi.annotation.documentation.Tags; | ||
| import org.apache.nifi.annotation.lifecycle.OnScheduled; | ||
| import org.apache.nifi.components.PropertyDescriptor; | ||
| import org.apache.nifi.components.ValidationContext; | ||
| import org.apache.nifi.components.ValidationResult; | ||
|
|
@@ -60,6 +64,10 @@ | |
| import org.apache.nifi.util.StopWatch; | ||
| import org.apache.nifi.util.Tuple; | ||
|
|
||
| import com.google.common.cache.CacheBuilder; | ||
| import com.google.common.cache.CacheLoader; | ||
| import com.google.common.cache.LoadingCache; | ||
|
|
||
| @EventDriven | ||
| @SideEffectFree | ||
| @SupportsBatching | ||
|
|
@@ -76,25 +84,59 @@ public class TransformXml extends AbstractProcessor { | |
| .name("XSLT file name") | ||
| .description("Provides the name (including full path) of the XSLT file to apply to the flowfile XML content.") | ||
| .required(true) | ||
| .expressionLanguageSupported(true) | ||
| .addValidator(StandardValidators.FILE_EXISTS_VALIDATOR) | ||
| .build(); | ||
|
|
||
| public static final PropertyDescriptor INDENT_OUTPUT = new PropertyDescriptor.Builder() | ||
| .name("indent-output") | ||
| .displayName("Indent") | ||
| .description("Whether or not to indent the output.") | ||
| .required(true) | ||
| .defaultValue("true") | ||
| .allowableValues("true", "false") | ||
| .addValidator(StandardValidators.BOOLEAN_VALIDATOR) | ||
| .build(); | ||
|
|
||
| public static final PropertyDescriptor CACHE_SIZE = new PropertyDescriptor.Builder() | ||
| .name("cache-size") | ||
| .displayName("Cache size") | ||
| .description("Maximum number of stylesheets to cache. Zero disables the cache.") | ||
| .required(true) | ||
| .defaultValue("10") | ||
| .addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR) | ||
| .build(); | ||
|
|
||
| public static final PropertyDescriptor CACHE_TTL_AFTER_LAST_ACCESS = new PropertyDescriptor.Builder() | ||
| .name("cache-ttl-after-last-access") | ||
| .displayName("Cache TTL after last access") | ||
| .description("The cache TTL (time-to-live) or how long to keep stylesheets in the cache after last access.") | ||
| .required(true) | ||
| .defaultValue("60 secs") | ||
| .addValidator(StandardValidators.TIME_PERIOD_VALIDATOR) | ||
| .build(); | ||
|
|
||
| public static final Relationship REL_SUCCESS = new Relationship.Builder() | ||
| .name("success") | ||
| .description("The FlowFile with transformed content will be routed to this relationship") | ||
| .build(); | ||
|
|
||
| public static final Relationship REL_FAILURE = new Relationship.Builder() | ||
| .name("failure") | ||
| .description("If a FlowFile fails processing for any reason (for example, the FlowFile is not valid XML), it will be routed to this relationship") | ||
| .build(); | ||
|
|
||
| private List<PropertyDescriptor> properties; | ||
| private Set<Relationship> relationships; | ||
| private LoadingCache<String, Templates> cache; | ||
|
|
||
| @Override | ||
| protected void init(final ProcessorInitializationContext context) { | ||
| final List<PropertyDescriptor> properties = new ArrayList<>(); | ||
| properties.add(XSLT_FILE_NAME); | ||
| properties.add(INDENT_OUTPUT); | ||
| properties.add(CACHE_SIZE); | ||
| properties.add(CACHE_TTL_AFTER_LAST_ACCESS); | ||
| this.properties = Collections.unmodifiableList(properties); | ||
|
|
||
| final Set<Relationship> relationships = new HashSet<>(); | ||
|
|
@@ -124,6 +166,35 @@ protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(final String | |
| .build(); | ||
| } | ||
|
|
||
| private Templates newTemplates(String path) throws TransformerConfigurationException { | ||
| TransformerFactory factory = TransformerFactory.newInstance(); | ||
| return factory.newTemplates(new StreamSource(path)); | ||
| } | ||
|
|
||
| @OnScheduled | ||
| public void onScheduled(final ProcessContext context) { | ||
| final ComponentLog logger = getLogger(); | ||
| final Integer cacheSize = context.getProperty(CACHE_SIZE).asInteger(); | ||
| final Long cacheTTL = context.getProperty(CACHE_TTL_AFTER_LAST_ACCESS).asTimePeriod(TimeUnit.SECONDS); | ||
|
|
||
| if (cacheSize > 0) { | ||
| CacheBuilder cacheBuilder = CacheBuilder.newBuilder().maximumSize(cacheSize); | ||
| if (cacheTTL > 0) { | ||
| cacheBuilder = cacheBuilder.expireAfterAccess(cacheTTL, TimeUnit.SECONDS); | ||
| } | ||
|
|
||
| cache = cacheBuilder.build( | ||
| new CacheLoader<String, Templates>() { | ||
| public Templates load(String path) throws TransformerConfigurationException { | ||
| return newTemplates(path); | ||
| } | ||
| }); | ||
| } else { | ||
| cache = null; | ||
| logger.warn("Stylesheet cache disabled because cache size is set to 0"); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The cache will still be created and on line 213 it will try to use it to look up an xslt if the cache size is 0. I'd suggest not even creating it in that case. |
||
| } | ||
| } | ||
|
|
||
| @Override | ||
| public void onTrigger(final ProcessContext context, final ProcessSession session) { | ||
| final FlowFile original = session.get(); | ||
|
|
@@ -133,17 +204,25 @@ public void onTrigger(final ProcessContext context, final ProcessSession session | |
|
|
||
| final ComponentLog logger = getLogger(); | ||
| final StopWatch stopWatch = new StopWatch(true); | ||
| final String xsltFileName = context.getProperty(XSLT_FILE_NAME) | ||
| .evaluateAttributeExpressions(original) | ||
| .getValue(); | ||
| final Boolean indentOutput = context.getProperty(INDENT_OUTPUT).asBoolean(); | ||
|
|
||
| try { | ||
| FlowFile transformed = session.write(original, new StreamCallback() { | ||
| @Override | ||
| public void process(final InputStream rawIn, final OutputStream out) throws IOException { | ||
| try (final InputStream in = new BufferedInputStream(rawIn)) { | ||
| final Templates templates; | ||
| if (cache != null) { | ||
| templates = cache.get(xsltFileName); | ||
| } else { | ||
| templates = newTemplates(xsltFileName); | ||
| } | ||
|
|
||
| File stylesheet = new File(context.getProperty(XSLT_FILE_NAME).getValue()); | ||
| StreamSource styleSource = new StreamSource(stylesheet); | ||
| TransformerFactory tfactory = new net.sf.saxon.TransformerFactoryImpl(); | ||
| Transformer transformer = tfactory.newTransformer(styleSource); | ||
| final Transformer transformer = templates.newTransformer(); | ||
| transformer.setOutputProperty(OutputKeys.INDENT, (indentOutput ? "yes" : "no")); | ||
|
|
||
| // pass all dynamic properties to the transformer | ||
| for (final Map.Entry<PropertyDescriptor, String> entry : context.getProperties().entrySet()) { | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
14 changes: 7 additions & 7 deletions
14
...fi-standard-bundle/nifi-standard-processors/src/test/resources/TestTransformXml/math.html
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,8 +1,8 @@ | ||
| <HTML xmlns:xs="http://www.w3.org/2001/XMLSchema"> | ||
| <H1>Test for mod</H1> | ||
| <HR> | ||
| <P>Should say "1": 1</P> | ||
| <P>Should say "1": 1</P> | ||
| <P>Should say "-1": -1</P> | ||
| <P>true</P> | ||
| </HTML> | ||
| <H1>Test for mod</H1> | ||
| <HR> | ||
| <P>Should say "1": 1</P> | ||
| <P>Should say "1": 1</P> | ||
| <P>Should say "-1": -1</P> | ||
| <P>true</P> | ||
| </HTML> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
28 changes: 14 additions & 14 deletions
28
...i-standard-bundle/nifi-standard-processors/src/test/resources/TestTransformXml/tokens.xml
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,17 +1,17 @@ | ||
| <?xml version="1.0" encoding="ISO-8859-1"?> | ||
| <test release="0.0" id="uuid_0"> | ||
| <event id="uuid_1"> | ||
| <token>1</token> | ||
| <token>2</token> | ||
| <token>3</token> | ||
| <token>4</token> | ||
| <token>C:\dir$abc</token> | ||
| <token>6</token> | ||
| <token>7</token> | ||
| <token>A,B</token> | ||
| <token>"don't"</token> | ||
| <token>2014-05-01T30:23:00Z</token> | ||
| <token>11</token> | ||
| <token>12</token> | ||
| </event> | ||
| <event id="uuid_1"> | ||
| <token>1</token> | ||
| <token>2</token> | ||
| <token>3</token> | ||
| <token>4</token> | ||
| <token>C:\dir$abc</token> | ||
| <token>6</token> | ||
| <token>7</token> | ||
| <token>A,B</token> | ||
| <token>"don't"</token> | ||
| <token>2014-05-01T30:23:00Z</token> | ||
| <token>11</token> | ||
| <token>12</token> | ||
| </event> | ||
| </test> |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this property a max character length or data size limit?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also, this is a integer validator but the property is interpreted as a long on line 171
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually my new guess is that this is the maximum number of stylesheets stored (the description could be added to, lol)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@JPercivall Yes, that's correct. So "Maximum number of stylesheets to store in the cache"?