Skip to content

Commit

Permalink
Deprecate use of htmlStrip as name for HtmlStripCharFilter (#27429)
Browse files Browse the repository at this point in the history
The camel case name `htmlStip` should be removed in favour of `html_strip`, but
we need to deprecate it first. This change adds deprecation warnings for indices 
with version starting with 6.3.0 and logs deprecation warnings in this cases.
  • Loading branch information
Christoph Büscher committed Apr 19, 2018
1 parent 9ef3a73 commit 3f5167f
Show file tree
Hide file tree
Showing 4 changed files with 148 additions and 2 deletions.
Expand Up @@ -67,6 +67,8 @@
import org.apache.lucene.analysis.standard.ClassicFilter;
import org.apache.lucene.analysis.tr.ApostropheFilter;
import org.apache.lucene.analysis.util.ElisionFilter;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
Expand All @@ -88,6 +90,9 @@
import static org.elasticsearch.plugins.AnalysisPlugin.requriesAnalysisSettings;

public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {

private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(CommonAnalysisPlugin.class));

@Override
public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
Map<String, AnalysisProvider<TokenFilterFactory>> filters = new TreeMap<>();
Expand Down Expand Up @@ -171,8 +176,14 @@ public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
public List<PreConfiguredCharFilter> getPreConfiguredCharFilters() {
List<PreConfiguredCharFilter> filters = new ArrayList<>();
filters.add(PreConfiguredCharFilter.singleton("html_strip", false, HTMLStripCharFilter::new));
// TODO deprecate htmlStrip
filters.add(PreConfiguredCharFilter.singleton("htmlStrip", false, HTMLStripCharFilter::new));
filters.add(PreConfiguredCharFilter.singletonWithVersion("htmlStrip", false, (reader, version) -> {
if (version.onOrAfter(org.elasticsearch.Version.V_6_3_0)) {
DEPRECATION_LOGGER.deprecatedAndMaybeLog("htmlStrip_deprecation",
"The [htmpStrip] char filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [html_strip] instead.");
}
return new HTMLStripCharFilter(reader);
}));
return filters;
}

Expand Down
@@ -0,0 +1,73 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.analysis.common;

import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.IndexSettingsModule;
import org.elasticsearch.test.VersionUtils;

import java.io.IOException;
import java.io.StringReader;
import java.util.Map;


public class HtmlStripCharFilterFactoryTests extends ESTestCase {

/**
* Check that the deprecated name "htmlStrip" issues a deprecation warning for indices created since 6.3.0
*/
public void testDeprecationWarning() throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_6_3_0, Version.CURRENT))
.build();

IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
Map<String, CharFilterFactory> charFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).charFilter;
CharFilterFactory charFilterFactory = charFilters.get("htmlStrip");
assertNotNull(charFilterFactory.create(new StringReader("input")));
assertWarnings("The [htmpStrip] char filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [html_strip] instead.");
}
}

/**
* Check that the deprecated name "htmlStrip" does NOT issues a deprecation warning for indices created before 6.3.0
*/
public void testNoDeprecationWarningPre6_3() throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetaData.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), Version.V_5_0_0, Version.V_6_2_4))
.build();

IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
Map<String, CharFilterFactory> charFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).charFilter;
CharFilterFactory charFilterFactory = charFilters.get("htmlStrip");
assertNotNull(charFilterFactory.create(new StringReader("")));
}
}
}
Expand Up @@ -17,3 +17,56 @@
- match: { error.type: illegal_argument_exception }
- match: { error.reason: "Custom normalizer may not use filter [word_delimiter]" }

---
"htmlStrip_deprecated":
- skip:
version: " - 6.2.99"
reason: deprecated in 6.3
features: "warnings"

- do:
indices.create:
index: test_deprecated_htmlstrip
body:
settings:
index:
analysis:
analyzer:
my_htmlStripWithCharfilter:
tokenizer: keyword
char_filter: ["htmlStrip"]
mappings:
type:
properties:
name:
type: text
analyzer: my_htmlStripWithCharfilter

- do:
warnings:
- 'The [htmpStrip] char filter name is deprecated and will be removed in a future version. Please change the filter name to [html_strip] instead.'
index:
index: test_deprecated_htmlstrip
type: type
id: 1
body: { "name": "foo bar" }

- do:
warnings:
- 'The [htmpStrip] char filter name is deprecated and will be removed in a future version. Please change the filter name to [html_strip] instead.'
index:
index: test_deprecated_htmlstrip
type: type
id: 2
body: { "name": "foo baz" }

- do:
warnings:
- 'The [htmpStrip] char filter name is deprecated and will be removed in a future version. Please change the filter name to [html_strip] instead.'
indices.analyze:
index: test_deprecated_htmlstrip
body:
analyzer: "my_htmlStripWithCharfilter"
text: "<html>foo</html>"
- length: { tokens: 1 }
- match: { tokens.0.token: "\nfoo\n" }
Expand Up @@ -40,6 +40,15 @@ public static PreConfiguredCharFilter singleton(String name, boolean useFilterFo
(reader, version) -> create.apply(reader));
}

/**
* Create a pre-configured char filter that may not vary at all, provide access to the elasticsearch verison
*/
public static PreConfiguredCharFilter singletonWithVersion(String name, boolean useFilterForMultitermQueries,
BiFunction<Reader, org.elasticsearch.Version, Reader> create) {
return new PreConfiguredCharFilter(name, CachingStrategy.ONE, useFilterForMultitermQueries,
(reader, version) -> create.apply(reader, version));
}

/**
* Create a pre-configured token filter that may vary based on the Lucene version.
*/
Expand Down

0 comments on commit 3f5167f

Please sign in to comment.