/
TopXFilter.java
214 lines (183 loc) · 7.14 KB
/
TopXFilter.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
package de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.filter;
import de.uni_mannheim.informatik.dws.melt.matching_base.Filter;
import de.uni_mannheim.informatik.dws.melt.matching_jena.MatcherYAAAJena;
import de.uni_mannheim.informatik.dws.melt.yet_another_alignment_api.Alignment;
import de.uni_mannheim.informatik.dws.melt.yet_another_alignment_api.Correspondence;
import org.apache.jena.ontology.OntModel;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.*;
import java.util.stream.Collectors;
/**
* This filter keeps only the top X correspondences according to confidence.
* The filter can be configured to be source-based (keep only the top X correspondences for each source node).
* The filter can be configured to be target-based (keep only the top X correspondences for each target node).
* The filter can be configured to be size-based (based on the smaller or larger side of the alignment).
*/
public class TopXFilter extends MatcherYAAAJena implements Filter {
/**
* Default Constructor
* @param x X
* @param filterMode The filter mode.
* @param threshold The desired threshold. Use 0.0d if you do not want to use threshold filtering.
*/
public TopXFilter(int x, TopFilterMode filterMode, double threshold){
setX(x);
setThreshold(threshold);
setFilterMode(filterMode);
}
public TopXFilter(int x){
this(x, DEFAULT_FILTER_MODE, DEFAULT_THRESHOLD);
}
public TopXFilter(int x, double threshold){
this(x, DEFAULT_FILTER_MODE, threshold);
}
public TopXFilter(int x, TopFilterMode mode){
this(x, mode, DEFAULT_THRESHOLD);
}
private static final Logger LOGGER = LoggerFactory.getLogger(TopXFilter.class);
private double threshold;
public static final double DEFAULT_THRESHOLD = 0.0;
private int x;
public static final int DEFAULT_X = 1;
private TopFilterMode filterMode;
public static TopFilterMode DEFAULT_FILTER_MODE = TopFilterMode.SMALLEST;
@Override
public Alignment match(OntModel source, OntModel target, Alignment inputAlignment, Properties properties) throws Exception {
return filter(inputAlignment);
}
/**
* Filters the given alignment such that there are only the top X correspondences (according to confidence) for
* every source node.
* @param alignment The initial alignment.
* @return The filtered alignment.
*/
public Alignment filter(Alignment alignment) {
if(alignment == null){
return null;
}
Alignment result = new Alignment(alignment, false);
int sourceSize, targetSize;
switch (this.getFilterMode()){
case SOURCE:
for(String source : alignment.getDistinctSources()){
result.addAll(filterTopX(alignment.getCorrespondencesSource(source).iterator()));
}
break;
case TARGET:
for(String target : alignment.getDistinctTargets()){
result.addAll(filterTopX(alignment.getCorrespondencesTarget(target).iterator()));
}
break;
case LARGEST:
sourceSize = getIteratorSize(alignment.getDistinctSources().iterator());
targetSize = getIteratorSize(alignment.getDistinctTargets().iterator());
if(sourceSize >= targetSize){
for(String source : alignment.getDistinctSources()){
result.addAll(filterTopX(alignment.getCorrespondencesSource(source).iterator()));
}
} else {
for(String target : alignment.getDistinctTargets()){
result.addAll(filterTopX(alignment.getCorrespondencesTarget(target).iterator()));
}
}
break;
case SMALLEST:
sourceSize = getIteratorSize(alignment.getDistinctSources().iterator());
targetSize = getIteratorSize(alignment.getDistinctTargets().iterator());
if(sourceSize <= targetSize){
for(String source : alignment.getDistinctSources()){
result.addAll(filterTopX(alignment.getCorrespondencesSource(source).iterator()));
}
} else {
for(String target : alignment.getDistinctTargets()){
result.addAll(filterTopX(alignment.getCorrespondencesTarget(target).iterator()));
}
}
break;
case SOURCE_AND_TARGET:
TopXFilter sourceFilter = new TopXFilter(this.x, TopFilterMode.SOURCE, this.threshold);
TopXFilter targetFilter = new TopXFilter(this.x, TopFilterMode.TARGET, this.threshold);
result.addAll(sourceFilter.filter(alignment));
result.addAll(targetFilter.filter(alignment));
}
return result;
}
private <T> int getIteratorSize(Iterator<T> iterator){
if (iterator == null){
return 0;
}
int result = 0;
while(iterator.hasNext()){
result++;
iterator.next();
}
return result;
}
private Alignment filterTopX(Iterator<Correspondence> iterator){
Alignment result = new Alignment();
if(iterator == null){
return result;
}
Set<Correspondence> correspondences = new HashSet<>();
while(iterator.hasNext()){
correspondences.add(iterator.next());
}
result.addAll(correspondences
.stream()
.filter(x -> x.getConfidence() > this.threshold)
.sorted(Comparator.reverseOrder())
.limit(this.x)
.collect(Collectors.toSet()));
return result;
}
public double getThreshold() {
return threshold;
}
public void setThreshold(double threshold) {
this.threshold = threshold;
}
public int getX() {
return x;
}
public void setX(int x) {
if(x < 1){
LOGGER.error("x cannot be less than 1. Using default: 1");
this.x = DEFAULT_X;
return;
}
this.x = x;
}
public TopFilterMode getFilterMode() {
return filterMode;
}
public void setFilterMode(TopFilterMode filterMode) {
this.filterMode = filterMode;
}
/**
* Filter mode.
*/
public enum TopFilterMode {
/**
* Keep the top X correspondences for the source.
*/
SOURCE,
/**
* Keep the top X correspondences for the target.
*/
TARGET,
/**
* Keep the top X correspondences for the source and for the target.
* Note that this may lead to more than X correspondences for a single source/target element in some cases.
*/
SOURCE_AND_TARGET,
/**
* Keep the top X correspondences for the smaller side in the alignment.
*/
SMALLEST,
/**
* Keep the top X correspondences for the larger side in the alignment.
*/
LARGEST;
}
}