Permalink
Browse files

Added EdgeNGram, PorterStem and Shingle token filter builders

  • Loading branch information...
devoyster committed May 18, 2012
1 parent 49c3ede commit c000e9a906d51c1970070c00ade484e058160b9e
Showing with 427 additions and 4 deletions.
  1. +43 −0 ...ic.Net.Tests/Builders/Analysis/TokenFilters/EdgeNGram/When_complete_EdgeNGramTokenFilter_built.cs
  2. +18 −0 ...astic.Net.Tests/Builders/Analysis/TokenFilters/EdgeNGram/When_empty_EdgeNGramTokenFilter_built.cs
  3. +31 −0 ....Net.Tests/Builders/Analysis/TokenFilters/PorterStem/When_complete_PorterStemTokenFilter_built.cs
  4. +18 −0 ...tic.Net.Tests/Builders/Analysis/TokenFilters/PorterStem/When_empty_PorterStemTokenFilter_built.cs
  5. +39 −0 ...lastic.Net.Tests/Builders/Analysis/TokenFilters/Shingle/When_complete_ShingleTokenFilter_built.cs
  6. +18 −0 ...inElastic.Net.Tests/Builders/Analysis/TokenFilters/Shingle/When_empty_ShingleTokenFilter_built.cs
  7. +24 −0 src/PlainElastic.Net.Tests/Builders/Analysis/TokenFilters/When_complete_TokenFilterSettings_built.cs
  8. +6 −0 src/PlainElastic.Net.Tests/PlainElastic.Net.Tests.csproj
  9. +16 −0 src/PlainElastic.Net/Builders/IndexSettings/Analysis/TokenFilters/EdgeNGramTokenFilter.cs
  10. +18 −0 src/PlainElastic.Net/Builders/IndexSettings/Analysis/TokenFilters/PorterStemTokenFilter.cs
  11. +38 −0 src/PlainElastic.Net/Builders/IndexSettings/Analysis/TokenFilters/ShingleTokenFilter.cs
  12. +78 −0 src/PlainElastic.Net/Builders/IndexSettings/Analysis/TokenFilters/TokenFilterSettings.cs
  13. +3 −0 src/PlainElastic.Net/PlainElastic.Net.csproj
  14. +4 −4 tools/T4Generators/PlainElastic.T4Generators/Builders/AnalysisComponentCompleteTestTemplate.tt
  15. +23 −0 tools/T4Generators/PlainElastic.T4Generators/Builders/TokenFilters/Metadata/edgeNGram.json
  16. +3 −0 tools/T4Generators/PlainElastic.T4Generators/Builders/TokenFilters/Metadata/porterStem.json
  17. +17 −0 tools/T4Generators/PlainElastic.T4Generators/Builders/TokenFilters/Metadata/shingle.json
  18. +30 −0 tools/T4Generators/PlainElastic.T4Generators/PlainElastic.T4Generators.csproj
@@ -0,0 +1,43 @@
+using Machine.Specifications;
+using PlainElastic.Net.IndexSettings;
+using PlainElastic.Net.Utils;
+
+namespace PlainElastic.Net.Tests.Builders.IndexSettings
+{
+ [Subject(typeof(EdgeNGramTokenFilter))]
+ class When_complete_EdgeNGramTokenFilter_built
+ {
+ Because of = () => result = new EdgeNGramTokenFilter()
+ .Name("name")
+ .Version("3.6")
+ .MinGram(2)
+ .MaxGram(3)
+ .Side(EdgeNGramSide.front)
+ .CustomPart("{ Custom }")
+ .ToString();
+
+ It should_start_with_name = () => result.ShouldStartWith("'name': {".AltQuote());
+
+ It should_contain_type_part = () => result.ShouldContain("'type': 'edgeNGram'".AltQuote());
+
+ It should_contain_version_part = () => result.ShouldContain("'version': '3.6'".AltQuote());
+
+ It should_contain_min_gram_part = () => result.ShouldContain("'min_gram': 2".AltQuote());
+
+ It should_contain_max_gram_part = () => result.ShouldContain("'max_gram': 3".AltQuote());
+
+ It should_contain_side_part = () => result.ShouldContain("'side': 'front'".AltQuote());
+
+ It should_contain_custom_part = () => result.ShouldContain("{ Custom }".AltQuote());
+
+ It should_return_correct_result = () => result.ShouldEqual(("'name': { " +
+ "'type': 'edgeNGram'," +
+ "'version': '3.6'," +
+ "'min_gram': 2," +
+ "'max_gram': 3," +
+ "'side': 'front'," +
+ "{ Custom } }").AltQuote());
+
+ private static string result;
+ }
+}
@@ -0,0 +1,18 @@
+using Machine.Specifications;
+using PlainElastic.Net.IndexSettings;
+using PlainElastic.Net.Utils;
+
+namespace PlainElastic.Net.Tests.Builders.IndexSettings
+{
+ [Subject(typeof(EdgeNGramTokenFilter))]
+ class When_empty_EdgeNGramTokenFilter_built
+ {
+ Because of = () => result = new EdgeNGramTokenFilter()
+ .Name("name")
+ .ToString();
+
+ It should_return_correct_result = () => result.ShouldEqual("'name': { 'type': 'edgeNGram' }".AltQuote());
+
+ private static string result;
+ }
+}
@@ -0,0 +1,31 @@
+using Machine.Specifications;
+using PlainElastic.Net.IndexSettings;
+using PlainElastic.Net.Utils;
+
+namespace PlainElastic.Net.Tests.Builders.IndexSettings
+{
+ [Subject(typeof(PorterStemTokenFilter))]
+ class When_complete_PorterStemTokenFilter_built
+ {
+ Because of = () => result = new PorterStemTokenFilter()
+ .Name("name")
+ .Version("3.6")
+ .CustomPart("{ Custom }")
+ .ToString();
+
+ It should_start_with_name = () => result.ShouldStartWith("'name': {".AltQuote());
+
+ It should_contain_type_part = () => result.ShouldContain("'type': 'porterStem'".AltQuote());
+
+ It should_contain_version_part = () => result.ShouldContain("'version': '3.6'".AltQuote());
+
+ It should_contain_custom_part = () => result.ShouldContain("{ Custom }".AltQuote());
+
+ It should_return_correct_result = () => result.ShouldEqual(("'name': { " +
+ "'type': 'porterStem'," +
+ "'version': '3.6'," +
+ "{ Custom } }").AltQuote());
+
+ private static string result;
+ }
+}
@@ -0,0 +1,18 @@
+using Machine.Specifications;
+using PlainElastic.Net.IndexSettings;
+using PlainElastic.Net.Utils;
+
+namespace PlainElastic.Net.Tests.Builders.IndexSettings
+{
+ [Subject(typeof(PorterStemTokenFilter))]
+ class When_empty_PorterStemTokenFilter_built
+ {
+ Because of = () => result = new PorterStemTokenFilter()
+ .Name("name")
+ .ToString();
+
+ It should_return_correct_result = () => result.ShouldEqual("'name': { 'type': 'porterStem' }".AltQuote());
+
+ private static string result;
+ }
+}
@@ -0,0 +1,39 @@
+using Machine.Specifications;
+using PlainElastic.Net.IndexSettings;
+using PlainElastic.Net.Utils;
+
+namespace PlainElastic.Net.Tests.Builders.IndexSettings
+{
+ [Subject(typeof(ShingleTokenFilter))]
+ class When_complete_ShingleTokenFilter_built
+ {
+ Because of = () => result = new ShingleTokenFilter()
+ .Name("name")
+ .Version("3.6")
+ .MaxShingleSize(2)
+ .OutputUnigrams(true)
+ .CustomPart("{ Custom }")
+ .ToString();
+
+ It should_start_with_name = () => result.ShouldStartWith("'name': {".AltQuote());
+
+ It should_contain_type_part = () => result.ShouldContain("'type': 'shingle'".AltQuote());
+
+ It should_contain_version_part = () => result.ShouldContain("'version': '3.6'".AltQuote());
+
+ It should_contain_max_shingle_size_part = () => result.ShouldContain("'max_shingle_size': 2".AltQuote());
+
+ It should_contain_output_unigrams_part = () => result.ShouldContain("'output_unigrams': true".AltQuote());
+
+ It should_contain_custom_part = () => result.ShouldContain("{ Custom }".AltQuote());
+
+ It should_return_correct_result = () => result.ShouldEqual(("'name': { " +
+ "'type': 'shingle'," +
+ "'version': '3.6'," +
+ "'max_shingle_size': 2," +
+ "'output_unigrams': true," +
+ "{ Custom } }").AltQuote());
+
+ private static string result;
+ }
+}
@@ -0,0 +1,18 @@
+using Machine.Specifications;
+using PlainElastic.Net.IndexSettings;
+using PlainElastic.Net.Utils;
+
+namespace PlainElastic.Net.Tests.Builders.IndexSettings
+{
+ [Subject(typeof(ShingleTokenFilter))]
+ class When_empty_ShingleTokenFilter_built
+ {
+ Because of = () => result = new ShingleTokenFilter()
+ .Name("name")
+ .ToString();
+
+ It should_return_correct_result = () => result.ShouldEqual("'name': { 'type': 'shingle' }".AltQuote());
+
+ private static string result;
+ }
+}
@@ -10,12 +10,18 @@ class When_complete_TokenFilterSettings_built
Because of = () => result = new TokenFilterSettings()
.Asciifolding(x => x.CustomPart("Asciifolding"))
.Asciifolding("named_asciifolding")
+ .EdgeNGram(x => x.CustomPart("EdgeNGram"))
+ .EdgeNGram("named_edgeNGram")
.Length(x => x.CustomPart("Length"))
.Length("named_length")
.Lowercase(x => x.CustomPart("Lowercase"))
.Lowercase("named_lowercase")
.NGram(x => x.CustomPart("NGram"))
.NGram("named_nGram")
+ .PorterStem(x => x.CustomPart("PorterStem"))
+ .PorterStem("named_porterStem")
+ .Shingle(x => x.CustomPart("Shingle"))
+ .Shingle("named_shingle")
.Standard(x => x.CustomPart("Standard"))
.Standard("named_standard")
.CustomPart("{ Custom }")
@@ -25,6 +31,10 @@ class When_complete_TokenFilterSettings_built
It should_contain_named_asciifolding_part = () => result.ShouldContain("'named_asciifolding': { 'type': 'asciifolding' }".AltQuote());
+ It should_contain_edgeNGram_part = () => result.ShouldContain("'edgeNGram': { 'type': 'edgeNGram',EdgeNGram }".AltQuote());
+
+ It should_contain_named_edgeNGram_part = () => result.ShouldContain("'named_edgeNGram': { 'type': 'edgeNGram' }".AltQuote());
+
It should_contain_length_part = () => result.ShouldContain("'length': { 'type': 'length',Length }".AltQuote());
It should_contain_named_length_part = () => result.ShouldContain("'named_length': { 'type': 'length' }".AltQuote());
@@ -37,6 +47,14 @@ class When_complete_TokenFilterSettings_built
It should_contain_named_nGram_part = () => result.ShouldContain("'named_nGram': { 'type': 'nGram' }".AltQuote());
+ It should_contain_porterStem_part = () => result.ShouldContain("'porterStem': { 'type': 'porterStem',PorterStem }".AltQuote());
+
+ It should_contain_named_porterStem_part = () => result.ShouldContain("'named_porterStem': { 'type': 'porterStem' }".AltQuote());
+
+ It should_contain_shingle_part = () => result.ShouldContain("'shingle': { 'type': 'shingle',Shingle }".AltQuote());
+
+ It should_contain_named_shingle_part = () => result.ShouldContain("'named_shingle': { 'type': 'shingle' }".AltQuote());
+
It should_contain_standard_part = () => result.ShouldContain("'standard': { 'type': 'standard',Standard }".AltQuote());
It should_contain_named_standard_part = () => result.ShouldContain("'named_standard': { 'type': 'standard' }".AltQuote());
@@ -46,12 +64,18 @@ class When_complete_TokenFilterSettings_built
It should_return_correct_result = () => result.ShouldEqual(("'token_filter': { " +
"'asciifolding': { 'type': 'asciifolding',Asciifolding }," +
"'named_asciifolding': { 'type': 'asciifolding' }," +
+ "'edgeNGram': { 'type': 'edgeNGram',EdgeNGram }," +
+ "'named_edgeNGram': { 'type': 'edgeNGram' }," +
"'length': { 'type': 'length',Length }," +
"'named_length': { 'type': 'length' }," +
"'lowercase': { 'type': 'lowercase',Lowercase }," +
"'named_lowercase': { 'type': 'lowercase' }," +
"'nGram': { 'type': 'nGram',NGram }," +
"'named_nGram': { 'type': 'nGram' }," +
+ "'porterStem': { 'type': 'porterStem',PorterStem }," +
+ "'named_porterStem': { 'type': 'porterStem' }," +
+ "'shingle': { 'type': 'shingle',Shingle }," +
+ "'named_shingle': { 'type': 'shingle' }," +
"'standard': { 'type': 'standard',Standard }," +
"'named_standard': { 'type': 'standard' }," +
"{ Custom } }").AltQuote());
@@ -67,12 +67,18 @@
<Compile Include="Builders\Analysis\Analyzers\Whitespace\When_empty_WhitespaceAnalyzer_built.cs" />
<Compile Include="Builders\Analysis\TokenFilters\Asciifolding\When_complete_AsciifoldingTokenFilter_built.cs" />
<Compile Include="Builders\Analysis\TokenFilters\Asciifolding\When_empty_AsciifoldingTokenFilter_built.cs" />
+ <Compile Include="Builders\Analysis\TokenFilters\EdgeNGram\When_complete_EdgeNGramTokenFilter_built.cs" />
+ <Compile Include="Builders\Analysis\TokenFilters\EdgeNGram\When_empty_EdgeNGramTokenFilter_built.cs" />
<Compile Include="Builders\Analysis\TokenFilters\Length\When_complete_LengthTokenFilter_built.cs" />
<Compile Include="Builders\Analysis\TokenFilters\Length\When_empty_LengthTokenFilter_built.cs" />
<Compile Include="Builders\Analysis\TokenFilters\Lowercase\When_complete_LowercaseTokenFilter_built.cs" />
<Compile Include="Builders\Analysis\TokenFilters\Lowercase\When_empty_LowercaseTokenFilter_built.cs" />
<Compile Include="Builders\Analysis\TokenFilters\NGram\When_complete_NGramTokenFilter_built.cs" />
<Compile Include="Builders\Analysis\TokenFilters\NGram\When_empty_NGramTokenFilter_built.cs" />
+ <Compile Include="Builders\Analysis\TokenFilters\PorterStem\When_complete_PorterStemTokenFilter_built.cs" />
+ <Compile Include="Builders\Analysis\TokenFilters\PorterStem\When_empty_PorterStemTokenFilter_built.cs" />
+ <Compile Include="Builders\Analysis\TokenFilters\Shingle\When_complete_ShingleTokenFilter_built.cs" />
+ <Compile Include="Builders\Analysis\TokenFilters\Shingle\When_empty_ShingleTokenFilter_built.cs" />
<Compile Include="Builders\Analysis\TokenFilters\Standard\When_complete_StandardTokenFilter_built.cs" />
<Compile Include="Builders\Analysis\TokenFilters\Standard\When_empty_StandardTokenFilter_built.cs" />
<Compile Include="Builders\Analysis\TokenFilters\When_complete_TokenFilterSettings_built.cs" />
@@ -0,0 +1,16 @@
+using PlainElastic.Net.Utils;
+
+namespace PlainElastic.Net.IndexSettings
+{
+ /// <summary>
+ /// A token filter of type edgeNGram that builds N-characters substrings from text. Substrings are built from one side of a text.
+ /// see http://www.elasticsearch.org/guide/reference/index-modules/analysis/edgengram-tokenfilter.html
+ /// </summary>
+ public class EdgeNGramTokenFilter : EdgeNGramComponentBase<EdgeNGramTokenFilter>
+ {
+ protected override string GetComponentType()
+ {
+ return DefaultTokenFilters.edgeNGram.AsString();
+ }
+ }
+}
@@ -0,0 +1,18 @@
+using PlainElastic.Net.Utils;
+
+namespace PlainElastic.Net.IndexSettings
+{
+ /// <summary>
+ /// A token filter of type porterStem that transforms the token stream as per the Porter stemming algorithm.
+ /// Note, the input to the stemming filter must already be in lower case, so you will need to use Lower Case Token Filter
+ /// or Lower Case Tokenizer farther down the Tokenizer chain in order for this to work properly!
+ /// see http://www.elasticsearch.org/guide/reference/index-modules/analysis/porterstem-tokenfilter.html
+ /// </summary>
+ public class PorterStemTokenFilter : NamedComponentBase<PorterStemTokenFilter>
+ {
+ protected override string GetComponentType()
+ {
+ return DefaultTokenFilters.porterStem.AsString();
+ }
+ }
+}
@@ -0,0 +1,38 @@
+using PlainElastic.Net.Utils;
+
+namespace PlainElastic.Net.IndexSettings
+{
+ /// <summary>
+ /// A token filter of type shingle that constructs shingles (token n-grams) from a token stream.
+ /// In other words, it creates combinations of tokens as a single token.
+ /// see http://www.elasticsearch.org/guide/reference/index-modules/analysis/shingle-tokenfilter.html
+ /// </summary>
+ public class ShingleTokenFilter : NamedComponentBase<ShingleTokenFilter>
+ {
+ protected override string GetComponentType()
+ {
+ return DefaultTokenFilters.shingle.AsString();
+ }
+
+
+ /// <summary>
+ /// Sets the maximal shingle size.
+ /// Defaults to 2.
+ /// </summary>
+ public ShingleTokenFilter MaxShingleSize(int maxShingleSize = 2)
+ {
+ RegisterJsonPart("'max_shingle_size': {0}", maxShingleSize.AsString());
+ return this;
+ }
+
+ /// <summary>
+ /// Sets flag indicating whether unigrams should be sent to output.
+ /// Defaults to true.
+ /// </summary>
+ public ShingleTokenFilter OutputUnigrams(bool outputUnigrams = true)
+ {
+ RegisterJsonPart("'output_unigrams': {0}", outputUnigrams.AsString());
+ return this;
+ }
+ }
+}
Oops, something went wrong.

0 comments on commit c000e9a

Please sign in to comment.