Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100755 238 lines (169 sloc) 6.018 kB
98cf8af @bl0b Added csv search tool.
authored
1 #!/usr/bin/env python
2
3 from itertools import ifilter
4 from csv_parser import Csv
5 import sys
6 import re
7 from jupyLR import Scanner, Automaton
9034d98 @bl0b 3 fixes.
authored
8 from optparse import OptionParser
98cf8af @bl0b Added csv search tool.
authored
9
10
11 #
12
13 SE_grammar = """
14 -colname
15 = STRING
16 | WHATEVER
17
cd8da84 @bl0b Added multiple columns and values per predicate.
authored
18 colset
19 = _colset
20
21 -_colset
22 = _colset COMMA colname
23 | colname
24
98cf8af @bl0b Added csv search tool.
authored
25 -value
26 = STRING
27 | WHATEVER
28
cd8da84 @bl0b Added multiple columns and values per predicate.
authored
29 valueset
30 = _valueset
31
32 -_valueset
33 = _valueset COMMA value
34 | value
35
98cf8af @bl0b Added csv search tool.
authored
36 -where
37 = p4
38
cd8da84 @bl0b Added multiple columns and values per predicate.
authored
39 p1 = colset EQ valueset
40 | colset MATCHES valueset
41 | colset CONTAINS valueset
98cf8af @bl0b Added csv search tool.
authored
42
9034d98 @bl0b 3 fixes.
authored
43 -p1 = OPEN_PAR where CLOSE_PAR
44
98cf8af @bl0b Added csv search tool.
authored
45 -p2 = p1
46 p2 = NOT p2
47
48 -p3 = p2
49 p3 = p2 AND p3
50
51 -p4 = p3
52 p4 = p3 OR p4
53 """
54
55
56 class Predicate(object):
57 pass
58
59
60 class ColumnPredicate(Predicate):
61
cd8da84 @bl0b Added multiple columns and values per predicate.
authored
62 def __init__(self, cols, values):
63 self.cols = cols
64 self.values = values
98cf8af @bl0b Added csv search tool.
authored
65
66 def __call__(self, row):
cd8da84 @bl0b Added multiple columns and values per predicate.
authored
67 return reduce(bool.__or__,
68 (reduce(lambda a, v: a or self.eval(c, v),
69 self.values, False)
70 for c in (row[i].lower() for i in self.cols)))
98cf8af @bl0b Added csv search tool.
authored
71
72
73 class Eq(ColumnPredicate):
74
9034d98 @bl0b 3 fixes.
authored
75 def __init__(self, col, value):
76 ColumnPredicate.__init__(self, col, value)
cd8da84 @bl0b Added multiple columns and values per predicate.
authored
77 self.values = [v.lower() for v in self.values]
9034d98 @bl0b 3 fixes.
authored
78
cd8da84 @bl0b Added multiple columns and values per predicate.
authored
79 def eval(self, c, v):
80 return c == v
98cf8af @bl0b Added csv search tool.
authored
81
82
83 class Contains(ColumnPredicate):
84
9034d98 @bl0b 3 fixes.
authored
85 def __init__(self, col, value):
86 ColumnPredicate.__init__(self, col, value)
cd8da84 @bl0b Added multiple columns and values per predicate.
authored
87 self.values = [v.lower() for v in self.values]
9034d98 @bl0b 3 fixes.
authored
88
cd8da84 @bl0b Added multiple columns and values per predicate.
authored
89 def eval(self, c, v):
90 return v in c
98cf8af @bl0b Added csv search tool.
authored
91
92
93 class Matches(ColumnPredicate):
94
95 def __init__(self, col, value):
96 ColumnPredicate.__init__(self, col, value)
9034d98 @bl0b 3 fixes.
authored
97 self.value = re.compile(self.value, re.IGNORECASE)
98cf8af @bl0b Added csv search tool.
authored
98
cd8da84 @bl0b Added multiple columns and values per predicate.
authored
99 def eval(self, c, v):
100 return v.match(c) is not None
98cf8af @bl0b Added csv search tool.
authored
101
102
103 class BinaryPredicate(Predicate):
104
105 def __init__(self, a, b):
106 self.a = a
107 self.b = b
108
109
110 class And(BinaryPredicate):
111
112 def __call__(self, row):
113 return self.a(row) and self.b(row)
114
115
116 class Or(BinaryPredicate):
117
118 def __call__(self, row):
119 return self.a(row) or self.b(row)
120
121
122 class Not(Predicate):
123
124 def __init__(self, a):
125 self.a = a
126
127 def __call__(self, row):
128 return not self.a(row)
129
130
131 class SE_Parser(Automaton):
132
133 def __init__(self, csv):
134 SE_scanner = Scanner(STRING=r'"((?:\\["\tvbnr]|[^\"])*)"',
135 AND=r"\band\b",
136 OR=r"\bor\b",
137 NOT=r"\bnot\b",
138 EQ="=",
cd8da84 @bl0b Added multiple columns and values per predicate.
authored
139 COMMA=",",
98cf8af @bl0b Added csv search tool.
authored
140 MATCHES=r"\bmatches\b",
141 CONTAINS=r"\bcontains\b",
9034d98 @bl0b 3 fixes.
authored
142 OPEN_PAR="[(]",
143 CLOSE_PAR="[)]",
98cf8af @bl0b Added csv search tool.
authored
144 _whitespace=r"[ \r\n\t]+",
145 discard_names=["_whitespace"])
cd8da84 @bl0b Added multiple columns and values per predicate.
authored
146 SE_scanner.add(WHATEVER=r"[^ ,\r\n\t=()]+")
98cf8af @bl0b Added csv search tool.
authored
147 Automaton.__init__(self, "where", SE_grammar, SE_scanner)
9034d98 @bl0b 3 fixes.
authored
148 if csv.headers:
149 self.colnames = dict((k, v)
150 for v in xrange(len(csv.headers))
151 for k in (v + 1, str(v + 1), csv.headers[v]))
152 else:
153 self.colnames = dict((k, i)
154 for i in xrange(len(csv.data[0]))
155 for k in (str(i + 1), i + 1))
98cf8af @bl0b Added csv search tool.
authored
156 self.val = {
157 'p1': self.p1,
158 'p2': self.p2,
159 'p3': self.p3,
cd8da84 @bl0b Added multiple columns and values per predicate.
authored
160 'p4': self.p4,
161 'colset': self.colset,
162 'valueset': self.valueset,
98cf8af @bl0b Added csv search tool.
authored
163 }
164
cd8da84 @bl0b Added multiple columns and values per predicate.
authored
165 def colset(self, ast):
166 return tuple(self.colnames[a[1]] for a in ast[1::2])
167
168 def valueset(self, ast):
169 return tuple(a[1] for a in ast[1::2])
170
98cf8af @bl0b Added csv search tool.
authored
171 def p1(self, ast):
172 op = {'EQ': Eq,
173 'MATCHES': Matches,
174 'CONTAINS': Contains
175 }[ast[2][0]]
cd8da84 @bl0b Added multiple columns and values per predicate.
authored
176 return op(ast[1], ast[3])
98cf8af @bl0b Added csv search tool.
authored
177
178 def p2(self, ast):
179 return Not(ast[2])
180
181 def p3(self, ast):
182 return And(ast[1], ast[3])
183
184 def p4(self, ast):
185 return Or(ast[1], ast[3])
186
187 def validate_ast(self, ast):
188 return self.val[ast[0]](ast)
189
190
191 def search_csv(csv, query):
192 se = SE_Parser(csv)
193 pred = se(query)[0]
194 return ifilter(pred, csv.data)
195
196
197 if __name__ == '__main__':
9034d98 @bl0b 3 fixes.
authored
198 op = OptionParser()
199 op.add_option("-s", "--separator", dest="sep",
200 help="CSV separator (default: tab)", default="\t")
201 op.add_option("-n", "--no-header", dest="nh", action="store_true",
202 default=False, help="Don't use first row as column names")
203
204 op.usage += ' [<CSV file> "query"]...'
205
1749bc3 @bl0b Added query syntax in usage.
authored
206 op.usage += """
207
208 Where query is a predicate on one or more columns in the CSV.
209 A column name is either its number (starting from 1) and/or the value in this
210 column in the first row if the -n option was not specified.
211 A value is anything. Use double quotes to enclose values containing spaces.
212
213 Column predicates are case-insensitive.
214
215 Column predicates are :
216 COLNAME contains VALUE the string VALUE appears anywhere inside
217 the column value.
218 COLNAME = VALUE the column value is exactly VALUE.
219 COLNAME matches VALUE VALUE is a regular expression and the
220 value in the column matches it.
221
222 Combinations are (A and B are any column predicate or any predicate already
223 mentioned):
224 ( predicate ) sub-expression grouping
225 not A true if A evaluates to false
226 A and B true if A and B both evaluate to true
227 A or B true if any of A or B evaluate to true
228 """
229
9034d98 @bl0b 3 fixes.
authored
230 opts, args = op.parse_args(sys.argv[1:])
231
232 for i in xrange(0, len(args), 2):
233 csv = Csv(args[i], sep=opts.sep, headers=not opts.nh)
234 if csv.headers:
4cf4b48 @bl0b Fixed unfinished statement which broke the output.
authored
235 print opts.sep.join(csv.headers)
9034d98 @bl0b 3 fixes.
authored
236 for row in search_csv(csv, args[i + 1]):
98cf8af @bl0b Added csv search tool.
authored
237 print "\t".join('"' + str(v) + '"' for v in row)
Something went wrong with that request. Please try again.