/
EphemeralDictionaries.java
133 lines (125 loc) · 4.35 KB
/
EphemeralDictionaries.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
/*
* Carrot2 project.
*
* Copyright (C) 2002-2020, Dawid Weiss, Stanisław Osiński.
* All rights reserved.
*
* Refer to the full license file "carrot2.LICENSE"
* in the root folder of the repository checkout or at:
* https://www.carrot2.org/carrot2.LICENSE
*/
package org.carrot2.language;
import java.util.Collections;
import java.util.List;
import org.carrot2.attrs.AttrComposite;
import org.carrot2.attrs.AttrObjectArray;
/**
* Ephemeral per-request overrides for the default {@link LanguageComponents} passed to the
* algorithm.
*
* @see #override(LanguageComponents)
* @since 4.1.0
*/
public class EphemeralDictionaries extends AttrComposite {
/**
* Additional stop word filtering dictionaries (supplying word filters that should be discarded
* from the input).
*
* <p>One or more dictionaries can be supplied. The default implementation in {@link
* DefaultDictionaryImpl} supports exact string matching and regular expression patterns.
*
* <p>REST-style example using the default implementation:
*
* <pre>{@code "wordFilters": [{
* "exact": ["word1", "word2"],
* "regexp": [
* "(?).+pattern1.+",
* "(?).+[0-9]{2}.+"
* ]
* }]}</pre>
*
* @see DefaultDictionaryImpl
* @see StopwordFilterDictionary
* @see StopwordFilter
*/
public AttrObjectArray<StopwordFilterDictionary> wordFilters =
attributes.register(
"wordFilters",
AttrObjectArray.builder(StopwordFilterDictionary.class, DefaultDictionaryImpl::new)
.label("Word filtering dictionaries.")
.defaultValue(Collections.emptyList()));
/**
* Additional label filtering dictionaries (supplying cluster label filters that should be
* discarded from the output).
*
* <p>One or more dictionaries can be supplied. The default implementation in {@link
* DefaultDictionaryImpl} supports exact string matching and regular expression patterns.
*
* <p>REST-style example using the default implementation:
*
* <pre>{@code "labelFilters": [{
* "exact": ["Cluster Label 1", "Foo Bar"],
* "regexp": [
* "(?).+pattern1.+",
* "(?).+[0-9]{2}.+"
* ]
* }]}</pre>
*
* @see DefaultDictionaryImpl
* @see LabelFilterDictionary
* @see LabelFilter
*/
public AttrObjectArray<LabelFilterDictionary> labelFilters =
attributes.register(
"labelFilters",
AttrObjectArray.builder(LabelFilterDictionary.class, DefaultDictionaryImpl::new)
.label("Cluster label filtering dictionaries.")
.defaultValue(Collections.emptyList()));
/**
* Override components of the existing {@link LanguageComponents} instance with this object's
* dictionaries.
*/
public LanguageComponents override(LanguageComponents languageComponents) {
List<StopwordFilterDictionary> wordFilterAttrs = this.wordFilters.get();
if (wordFilterAttrs != null && !wordFilterAttrs.isEmpty()) {
StopwordFilter dictFilter =
wordFilterAttrs.stream()
.map(StopwordFilterDictionary::compileStopwordFilter)
.reduce(StopwordFilter::and)
.get();
languageComponents =
languageComponents.override(
StopwordFilter.class,
(previous) ->
() -> {
StopwordFilter previousFilter = previous.get();
if (previousFilter != null) {
return previousFilter.and(dictFilter);
} else {
return dictFilter;
}
});
}
List<LabelFilterDictionary> labelFilterAttrs = this.labelFilters.get();
if (labelFilterAttrs != null && !labelFilterAttrs.isEmpty()) {
LabelFilter dictFilter =
labelFilterAttrs.stream()
.map(LabelFilterDictionary::compileLabelFilter)
.reduce(LabelFilter::and)
.get();
languageComponents =
languageComponents.override(
LabelFilter.class,
(previous) ->
() -> {
LabelFilter previousFilter = previous.get();
if (previousFilter != null) {
return previousFilter.and(dictFilter);
} else {
return dictFilter;
}
});
}
return languageComponents;
}
}