/
TokenizerChain.java
90 lines (81 loc) · 2.93 KB
/
TokenizerChain.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
/*
* Hibernate, Relational Persistence for Idiomatic Java
*
* Copyright (c) 2014, Red Hat, Inc. and/or its affiliates or third-party contributors as
* indicated by the @author tags or express copyright attribution
* statements applied by the authors. All third-party contributions are
* distributed under license by Red Hat, Inc.
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.hibernate.search.impl;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.CharFilterFactory;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.util.TokenizerFactory;
/**
* Inspired by Apache Solr's org.apache.solr.analysis.TokenizerChain.TokenizerChain
*/
public final class TokenizerChain extends Analyzer {
private final CharFilterFactory[] charFilters;
private final TokenizerFactory tokenizer;
private final TokenFilterFactory[] filters;
public TokenizerChain(CharFilterFactory[] charFilters, TokenizerFactory tokenizer, TokenFilterFactory[] filters) {
this.charFilters = charFilters != null ? charFilters : new CharFilterFactory[0];
this.tokenizer = tokenizer;
this.filters = filters != null ? filters : new TokenFilterFactory[0];
}
@Override
public Reader initReader(final String fieldName, final Reader reader) {
if ( charFilters.length > 0 ) {
Reader cs = reader;
for ( CharFilterFactory charFilter : charFilters ) {
cs = charFilter.create( cs );
}
return cs;
}
else {
return reader;
}
}
@Override
protected TokenStreamComponents createComponents(final String fieldName, final Reader aReader) {
Tokenizer tk = tokenizer.create( aReader );
TokenStream ts = tk;
for ( TokenFilterFactory filter : filters ) {
ts = filter.create( ts );
}
return new TokenStreamComponents( tk, ts );
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder( "TokenizerChain(" );
for ( CharFilterFactory filter : charFilters ) {
sb.append( filter );
sb.append( ", " );
}
sb.append( tokenizer );
for ( TokenFilterFactory filter : filters ) {
sb.append( ", " );
sb.append( filter );
}
sb.append( ')' );
return sb.toString();
}
}