/
filtering.d
231 lines (198 loc) · 6.68 KB
/
filtering.d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
/*
This file is part of Sambamba.
Copyright (C) 2012 Artem Tarasov <lomereiter@gmail.com>
Sambamba is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
Sambamba is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/**
Set of filters for alignments.
All share a common interface and can be easily combined.
*/
module filtering;
import std.regex;
import std.algorithm;
import std.conv;
import alignment;
import tagvalue;
import validation.alignment;
/// Common interface for all filters
interface Filter {
bool accepts(ref Alignment a) const;
}
/// Filter which accepts all alignments
final class NullFilter : Filter {
bool accepts(ref Alignment a) const {
return true;
}
}
/// Validating filter
final class ValidAlignmentFilter : Filter {
bool accepts(ref Alignment a) const {
return isValid(a);
}
}
/// Intersection of two filters
final class AndFilter : Filter {
private Filter _a, _b;
this(Filter a, Filter b) { _a = a; _b = b; }
bool accepts(ref Alignment a) const {
return _a.accepts(a) && _b.accepts(a);
}
}
/// Union of two filters
final class OrFilter : Filter {
private Filter _a, _b;
this(Filter a, Filter b) { _a = a, _b = b; }
bool accepts(ref Alignment a) const {
return _a.accepts(a) || _b.accepts(a);
}
}
/// Negation of a filter
final class NotFilter : Filter {
private Filter _a;
this(Filter a) { _a = a; }
bool accepts(ref Alignment a) const {
return !_a.accepts(a);
}
}
/// Filter alignments which has $(D flagname) flag set
final class FlagFilter(string flagname) : Filter {
bool accepts(ref Alignment a) const {
mixin("return a." ~ flagname ~ ";");
}
}
/// Filtering integer fields
final class IntegerFieldFilter(string op) : Filter {
private long _value;
private string _fieldname;
this(string fieldname, long value) {
_fieldname = fieldname;
_value = value;
}
bool accepts(ref Alignment a) const {
switch(_fieldname) {
case "ref_id": mixin("return a.ref_id " ~ op ~ "_value;");
case "position": mixin("return a.position " ~ op ~ "_value;");
case "mapping_quality": mixin("return a.mapping_quality " ~ op ~ "_value;");
case "sequence_length": mixin("return a.sequence_length " ~ op ~ "_value;");
case "mate_ref_id": mixin("return a.next_ref_id " ~ op ~ "_value;");
case "mate_position": mixin("return a.next_pos " ~ op ~ "_value;");
case "template_length": mixin("return a.template_length " ~ op ~ "_value;");
default: throw new Exception("unknown integer field '" ~ _fieldname ~ "'");
}
}
}
final class TagExistenceFilter(string op) : Filter {
static assert(op == "==" || op == "!=");
private string _tagname;
private static bool _should_exist = op == "!=";
this(string tagname, typeof(null) dummy) {
_tagname = tagname;
}
bool accepts(ref Alignment a) const {
auto v = a[_tagname];
if (_should_exist)
return !v.is_nothing;
else
return v.is_nothing;
}
}
/// Filtering integer tags
final class IntegerTagFilter(string op) : Filter {
private long _value;
private string _tagname;
this(string tagname, long value) {
_tagname = tagname;
_value = value;
}
bool accepts(ref Alignment a) const {
auto v = a[_tagname];
if (!v.is_integer && !v.is_float)
return false;
if (v.is_float) {
mixin(`return cast(float)v` ~ op ~ `_value;`);
} else {
mixin(`return cast(long)v` ~ op ~ `_value;`);
}
}
}
/// Filtering string fields
final class StringFieldFilter(string op) : Filter {
private string _value;
private string _fieldname;
this(string fieldname, string value) {
_fieldname = fieldname;
_value = value;
}
bool accepts(ref Alignment a) const {
switch(_fieldname) {
case "read_name": mixin("return a.read_name " ~ op ~ " _value;");
case "sequence": mixin("return cmp(a.sequence, _value) " ~ op ~ " 0;");
case "cigar": mixin("return a.cigarString() " ~ op ~ " _value;");
default: throw new Exception("unknown string field '" ~ _fieldname ~ "'");
}
}
}
/// Filtering string and character tags
final class StringTagFilter(string op) : Filter {
private string _value;
private string _tagname;
this(string tagname, string value) {
_tagname = tagname;
_value = value;
}
bool accepts(ref Alignment a) const {
auto v = a[_tagname];
if (v.is_string) {
mixin(`return cast(string)v` ~ op ~ `_value;`);
} else if (v.is_character) {
if (_value.length != 1)
return false; // doesn't make sense to compare char with string
mixin(`return cast(char)v` ~ op ~ `_value[0];`);
} else {
return false;
}
}
}
/// Filtering string fields with a regular expression
final class RegexpFieldFilter : Filter {
private string _fieldname;
private Regex!char _pattern;
this(string fieldname, Regex!char pattern) {
_fieldname = fieldname;
_pattern = pattern;
}
bool accepts(ref Alignment a) const {
switch(_fieldname) {
case "read_name": return !match(a.read_name, cast()_pattern).empty;
case "sequence": return !match(to!string(a.sequence), cast()_pattern).empty;
case "cigar": return !match(a.cigarString(), cast()_pattern).empty;
default: throw new Exception("unknown string field '" ~ _fieldname ~ "'");
}
}
}
/// Filtering string tags with a regular expression
final class RegexpTagFilter : Filter {
private string _tagname;
private Regex!char _pattern;
this(string tagname, Regex!char pattern) {
_tagname = tagname;
_pattern = pattern;
}
bool accepts(ref Alignment a) const {
auto v = a[_tagname];
if (!v.is_string) {
return false;
}
return !match(cast(string)v, cast()_pattern).empty;
}
}